1//===-- X86InstrFragmentsSIMD.td - x86 SIMD ISA ------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file provides pattern fragments useful for SIMD instructions. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// MMX specific DAG Nodes. 16//===----------------------------------------------------------------------===// 17 18// Low word of MMX to GPR. 19def MMX_X86movd2w : SDNode<"X86ISD::MMX_MOVD2W", SDTypeProfile<1, 1, 20 [SDTCisVT<0, i32>, SDTCisVT<1, x86mmx>]>>; 21// GPR to low word of MMX. 22def MMX_X86movw2d : SDNode<"X86ISD::MMX_MOVW2D", SDTypeProfile<1, 1, 23 [SDTCisVT<0, x86mmx>, SDTCisVT<1, i32>]>>; 24 25//===----------------------------------------------------------------------===// 26// MMX Pattern Fragments 27//===----------------------------------------------------------------------===// 28 29def load_mmx : PatFrag<(ops node:$ptr), (x86mmx (load node:$ptr))>; 30def load_mvmmx : PatFrag<(ops node:$ptr), 31 (x86mmx (MMX_X86movw2d (load node:$ptr)))>; 32def bc_mmx : PatFrag<(ops node:$in), (x86mmx (bitconvert node:$in))>; 33 34//===----------------------------------------------------------------------===// 35// SSE specific DAG Nodes. 36//===----------------------------------------------------------------------===// 37 38def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>, 39 SDTCisFP<0>, SDTCisInt<2> ]>; 40def SDTX86VFCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, 41 SDTCisFP<1>, SDTCisVT<3, i8>, 42 SDTCisVec<1>]>; 43 44def X86umin : SDNode<"X86ISD::UMIN", SDTIntBinOp>; 45def X86umax : SDNode<"X86ISD::UMAX", SDTIntBinOp>; 46def X86smin : SDNode<"X86ISD::SMIN", SDTIntBinOp>; 47def X86smax : SDNode<"X86ISD::SMAX", SDTIntBinOp>; 48 49def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>; 50def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>; 51 52// Commutative and Associative FMIN and FMAX. 53def X86fminc : SDNode<"X86ISD::FMINC", SDTFPBinOp, 54 [SDNPCommutative, SDNPAssociative]>; 55def X86fmaxc : SDNode<"X86ISD::FMAXC", SDTFPBinOp, 56 [SDNPCommutative, SDNPAssociative]>; 57 58def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp, 59 [SDNPCommutative, SDNPAssociative]>; 60def X86for : SDNode<"X86ISD::FOR", SDTFPBinOp, 61 [SDNPCommutative, SDNPAssociative]>; 62def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp, 63 [SDNPCommutative, SDNPAssociative]>; 64def X86fandn : SDNode<"X86ISD::FANDN", SDTFPBinOp, 65 [SDNPCommutative, SDNPAssociative]>; 66def X86frsqrt : SDNode<"X86ISD::FRSQRT", SDTFPUnaryOp>; 67def X86frcp : SDNode<"X86ISD::FRCP", SDTFPUnaryOp>; 68def X86fsrl : SDNode<"X86ISD::FSRL", SDTX86FPShiftOp>; 69def X86fgetsign: SDNode<"X86ISD::FGETSIGNx86",SDTFPToIntOp>; 70def X86fhadd : SDNode<"X86ISD::FHADD", SDTFPBinOp>; 71def X86fhsub : SDNode<"X86ISD::FHSUB", SDTFPBinOp>; 72def X86hadd : SDNode<"X86ISD::HADD", SDTIntBinOp>; 73def X86hsub : SDNode<"X86ISD::HSUB", SDTIntBinOp>; 74def X86comi : SDNode<"X86ISD::COMI", SDTX86CmpTest>; 75def X86ucomi : SDNode<"X86ISD::UCOMI", SDTX86CmpTest>; 76def X86cmps : SDNode<"X86ISD::FSETCC", SDTX86Cmps>; 77//def X86cmpsd : SDNode<"X86ISD::FSETCCsd", SDTX86Cmpsd>; 78def X86pshufb : SDNode<"X86ISD::PSHUFB", 79 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 80 SDTCisSameAs<0,2>]>>; 81def X86andnp : SDNode<"X86ISD::ANDNP", 82 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 83 SDTCisSameAs<0,2>]>>; 84def X86psign : SDNode<"X86ISD::PSIGN", 85 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 86 SDTCisSameAs<0,2>]>>; 87def X86pextrb : SDNode<"X86ISD::PEXTRB", 88 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; 89def X86pextrw : SDNode<"X86ISD::PEXTRW", 90 SDTypeProfile<1, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<2>]>>; 91def X86pinsrb : SDNode<"X86ISD::PINSRB", 92 SDTypeProfile<1, 3, [SDTCisVT<0, v16i8>, SDTCisSameAs<0,1>, 93 SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; 94def X86pinsrw : SDNode<"X86ISD::PINSRW", 95 SDTypeProfile<1, 3, [SDTCisVT<0, v8i16>, SDTCisSameAs<0,1>, 96 SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>; 97def X86insertps : SDNode<"X86ISD::INSERTPS", 98 SDTypeProfile<1, 3, [SDTCisVT<0, v4f32>, SDTCisSameAs<0,1>, 99 SDTCisVT<2, v4f32>, SDTCisVT<3, i8>]>>; 100def X86vzmovl : SDNode<"X86ISD::VZEXT_MOVL", 101 SDTypeProfile<1, 1, [SDTCisSameAs<0,1>]>>; 102 103def X86vzload : SDNode<"X86ISD::VZEXT_LOAD", SDTLoad, 104 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; 105 106def X86vzext : SDNode<"X86ISD::VZEXT", 107 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, 108 SDTCisInt<0>, SDTCisInt<1>, 109 SDTCisOpSmallerThanOp<1, 0>]>>; 110 111def X86vsext : SDNode<"X86ISD::VSEXT", 112 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, 113 SDTCisInt<0>, SDTCisInt<1>, 114 SDTCisOpSmallerThanOp<1, 0>]>>; 115 116def X86vtrunc : SDNode<"X86ISD::VTRUNC", 117 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, 118 SDTCisInt<0>, SDTCisInt<1>, 119 SDTCisOpSmallerThanOp<0, 1>]>>; 120def X86trunc : SDNode<"X86ISD::TRUNC", 121 SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>, 122 SDTCisOpSmallerThanOp<0, 1>]>>; 123 124def X86vtruncm : SDNode<"X86ISD::VTRUNCM", 125 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 126 SDTCisInt<0>, SDTCisInt<1>, 127 SDTCisVec<2>, SDTCisInt<2>, 128 SDTCisOpSmallerThanOp<0, 2>]>>; 129def X86vfpext : SDNode<"X86ISD::VFPEXT", 130 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, 131 SDTCisFP<0>, SDTCisFP<1>, 132 SDTCisOpSmallerThanOp<1, 0>]>>; 133def X86vfpround: SDNode<"X86ISD::VFPROUND", 134 SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>, 135 SDTCisFP<0>, SDTCisFP<1>, 136 SDTCisOpSmallerThanOp<0, 1>]>>; 137 138def X86vshldq : SDNode<"X86ISD::VSHLDQ", SDTIntShiftOp>; 139def X86vshrdq : SDNode<"X86ISD::VSRLDQ", SDTIntShiftOp>; 140def X86cmpp : SDNode<"X86ISD::CMPP", SDTX86VFCMP>; 141def X86pcmpeq : SDNode<"X86ISD::PCMPEQ", SDTIntBinOp, [SDNPCommutative]>; 142def X86pcmpgt : SDNode<"X86ISD::PCMPGT", SDTIntBinOp>; 143 144def X86IntCmpMask : SDTypeProfile<1, 2, 145 [SDTCisVec<0>, SDTCisSameAs<1, 2>, SDTCisInt<1>]>; 146def X86pcmpeqm : SDNode<"X86ISD::PCMPEQM", X86IntCmpMask, [SDNPCommutative]>; 147def X86pcmpgtm : SDNode<"X86ISD::PCMPGTM", X86IntCmpMask>; 148 149def X86CmpMaskCC : 150 SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<0>, SDTCisVec<1>, 151 SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; 152def X86CmpMaskCCScalar : 153 SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, SDTCisVT<3, i8>]>; 154 155def X86cmpm : SDNode<"X86ISD::CMPM", X86CmpMaskCC>; 156def X86cmpmu : SDNode<"X86ISD::CMPMU", X86CmpMaskCC>; 157def X86cmpms : SDNode<"X86ISD::FSETCC", X86CmpMaskCCScalar>; 158 159def X86vshl : SDNode<"X86ISD::VSHL", 160 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 161 SDTCisVec<2>]>>; 162def X86vsrl : SDNode<"X86ISD::VSRL", 163 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 164 SDTCisVec<2>]>>; 165def X86vsra : SDNode<"X86ISD::VSRA", 166 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 167 SDTCisVec<2>]>>; 168 169def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>; 170def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>; 171def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>; 172 173def SDTX86CmpPTest : SDTypeProfile<1, 2, [SDTCisVT<0, i32>, 174 SDTCisVec<1>, 175 SDTCisSameAs<2, 1>]>; 176def X86subus : SDNode<"X86ISD::SUBUS", SDTIntBinOp>; 177def X86ptest : SDNode<"X86ISD::PTEST", SDTX86CmpPTest>; 178def X86testp : SDNode<"X86ISD::TESTP", SDTX86CmpPTest>; 179def X86kortest : SDNode<"X86ISD::KORTEST", SDTX86CmpPTest>; 180def X86testm : SDNode<"X86ISD::TESTM", SDTypeProfile<1, 2, [SDTCisVec<0>, 181 SDTCisVec<1>, 182 SDTCisSameAs<2, 1>]>>; 183def X86testnm : SDNode<"X86ISD::TESTNM", SDTypeProfile<1, 2, [SDTCisVec<0>, 184 SDTCisVec<1>, 185 SDTCisSameAs<2, 1>]>>; 186def X86select : SDNode<"X86ISD::SELECT" , SDTSelect>; 187 188def X86pmuludq : SDNode<"X86ISD::PMULUDQ", 189 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 190 SDTCisSameAs<1,2>]>>; 191def X86pmuldq : SDNode<"X86ISD::PMULDQ", 192 SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, 193 SDTCisSameAs<1,2>]>>; 194 195// Specific shuffle nodes - At some point ISD::VECTOR_SHUFFLE will always get 196// translated into one of the target nodes below during lowering. 197// Note: this is a work in progress... 198def SDTShuff1Op : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>; 199def SDTShuff2Op : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 200 SDTCisSameAs<0,2>]>; 201def SDTShuff3Op : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 202 SDTCisSameAs<0,2>, SDTCisSameAs<0,3>]>; 203 204def SDTShuff2OpM : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, 205 SDTCisVec<2>]>; 206def SDTShuff2OpI : SDTypeProfile<1, 2, [SDTCisVec<0>, 207 SDTCisSameAs<0,1>, SDTCisInt<2>]>; 208def SDTShuff3OpI : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 209 SDTCisSameAs<0,2>, SDTCisInt<3>]>; 210 211def SDTVBroadcast : SDTypeProfile<1, 1, [SDTCisVec<0>]>; 212def SDTVBroadcastm : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>]>; 213 214def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, 215 SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; 216 217def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. 218 SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; 219 220def SDTFma : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, 221 SDTCisSameAs<1,2>, SDTCisSameAs<1,3>]>; 222def SDTFmaRound : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, 223 SDTCisSameAs<1,2>, SDTCisSameAs<1,3>, SDTCisInt<4>]>; 224def STDFp1SrcRm : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, 225 SDTCisVec<0>, SDTCisInt<2>]>; 226def STDFp2SrcRm : SDTypeProfile<1, 3, [SDTCisSameAs<0,1>, 227 SDTCisVec<0>, SDTCisInt<3>]>; 228def STDFp3SrcRm : SDTypeProfile<1, 4, [SDTCisSameAs<0,1>, 229 SDTCisVec<0>, SDTCisInt<3>, SDTCisInt<4>]>; 230 231def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; 232def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; 233 234def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; 235def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; 236def X86PShuflw : SDNode<"X86ISD::PSHUFLW", SDTShuff2OpI>; 237 238def X86Shufp : SDNode<"X86ISD::SHUFP", SDTShuff3OpI>; 239 240def X86Movddup : SDNode<"X86ISD::MOVDDUP", SDTShuff1Op>; 241def X86Movshdup : SDNode<"X86ISD::MOVSHDUP", SDTShuff1Op>; 242def X86Movsldup : SDNode<"X86ISD::MOVSLDUP", SDTShuff1Op>; 243 244def X86Movsd : SDNode<"X86ISD::MOVSD", SDTShuff2Op>; 245def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>; 246 247def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>; 248def X86Movlhpd : SDNode<"X86ISD::MOVLHPD", SDTShuff2Op>; 249def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; 250 251def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; 252def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; 253 254def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; 255def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>; 256def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>; 257 258def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; 259def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; 260 261def X86VPermilpv : SDNode<"X86ISD::VPERMILPV", SDTShuff2OpM>; 262def X86VPermilpi : SDNode<"X86ISD::VPERMILPI", SDTShuff2OpI>; 263def X86VPermv : SDNode<"X86ISD::VPERMV", SDTShuff2Op>; 264def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; 265def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>; 266def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; 267 268def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; 269 270def X86VBroadcast : SDNode<"X86ISD::VBROADCAST", SDTVBroadcast>; 271def X86VBroadcastm : SDNode<"X86ISD::VBROADCASTM", SDTVBroadcastm>; 272def X86Vinsert : SDNode<"X86ISD::VINSERT", SDTypeProfile<1, 3, 273 [SDTCisSameAs<0, 1>, SDTCisPtrTy<3>]>, []>; 274def X86Vextract : SDNode<"X86ISD::VEXTRACT", SDTypeProfile<1, 2, 275 [SDTCisVec<1>, SDTCisPtrTy<2>]>, []>; 276 277def X86Blendi : SDNode<"X86ISD::BLENDI", SDTBlend>; 278 279def X86Addsub : SDNode<"X86ISD::ADDSUB", SDTFPBinOp>; 280 281def X86faddRnd : SDNode<"X86ISD::FADD_RND", SDTFPBinOpRound>; 282def X86fsubRnd : SDNode<"X86ISD::FSUB_RND", SDTFPBinOpRound>; 283def X86fmulRnd : SDNode<"X86ISD::FMUL_RND", SDTFPBinOpRound>; 284def X86fdivRnd : SDNode<"X86ISD::FDIV_RND", SDTFPBinOpRound>; 285def X86fmaxRnd : SDNode<"X86ISD::FMAX", SDTFPBinOpRound>; 286def X86fminRnd : SDNode<"X86ISD::FMIN", SDTFPBinOpRound>; 287 288def X86Fmadd : SDNode<"X86ISD::FMADD", SDTFma>; 289def X86Fnmadd : SDNode<"X86ISD::FNMADD", SDTFma>; 290def X86Fmsub : SDNode<"X86ISD::FMSUB", SDTFma>; 291def X86Fnmsub : SDNode<"X86ISD::FNMSUB", SDTFma>; 292def X86Fmaddsub : SDNode<"X86ISD::FMADDSUB", SDTFma>; 293def X86Fmsubadd : SDNode<"X86ISD::FMSUBADD", SDTFma>; 294 295def X86FmaddRnd : SDNode<"X86ISD::FMADD_RND", SDTFmaRound>; 296def X86FnmaddRnd : SDNode<"X86ISD::FNMADD_RND", SDTFmaRound>; 297def X86FmsubRnd : SDNode<"X86ISD::FMSUB_RND", SDTFmaRound>; 298def X86FnmsubRnd : SDNode<"X86ISD::FNMSUB_RND", SDTFmaRound>; 299def X86FmaddsubRnd : SDNode<"X86ISD::FMADDSUB_RND", SDTFmaRound>; 300def X86FmsubaddRnd : SDNode<"X86ISD::FMSUBADD_RND", SDTFmaRound>; 301 302def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", STDFp1SrcRm>; 303def X86rcp28 : SDNode<"X86ISD::RCP28", STDFp1SrcRm>; 304def X86exp2 : SDNode<"X86ISD::EXP2", STDFp1SrcRm>; 305 306def X86rsqrt28s : SDNode<"X86ISD::RSQRT28", STDFp2SrcRm>; 307def X86rcp28s : SDNode<"X86ISD::RCP28", STDFp2SrcRm>; 308def X86RndScale : SDNode<"X86ISD::RNDSCALE", STDFp3SrcRm>; 309 310def SDT_PCMPISTRI : SDTypeProfile<2, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, 311 SDTCisVT<2, v16i8>, SDTCisVT<3, v16i8>, 312 SDTCisVT<4, i8>]>; 313def SDT_PCMPESTRI : SDTypeProfile<2, 5, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, 314 SDTCisVT<2, v16i8>, SDTCisVT<3, i32>, 315 SDTCisVT<4, v16i8>, SDTCisVT<5, i32>, 316 SDTCisVT<6, i8>]>; 317 318def X86pcmpistri : SDNode<"X86ISD::PCMPISTRI", SDT_PCMPISTRI>; 319def X86pcmpestri : SDNode<"X86ISD::PCMPESTRI", SDT_PCMPESTRI>; 320 321def X86compress: SDNode<"X86ISD::COMPRESS", SDTypeProfile<1, 3, 322 [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, 323 SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; 324def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 3, 325 [SDTCisSameAs<0, 3>, 326 SDTCisVec<3>, SDTCisVec<1>, SDTCisInt<1>]>, []>; 327 328//===----------------------------------------------------------------------===// 329// SSE Complex Patterns 330//===----------------------------------------------------------------------===// 331 332// These are 'extloads' from a scalar to the low element of a vector, zeroing 333// the top elements. These are used for the SSE 'ss' and 'sd' instruction 334// forms. 335def sse_load_f32 : ComplexPattern<v4f32, 5, "SelectScalarSSELoad", [], 336 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, 337 SDNPWantRoot]>; 338def sse_load_f64 : ComplexPattern<v2f64, 5, "SelectScalarSSELoad", [], 339 [SDNPHasChain, SDNPMayLoad, SDNPMemOperand, 340 SDNPWantRoot]>; 341 342def ssmem : Operand<v4f32> { 343 let PrintMethod = "printf32mem"; 344 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); 345 let ParserMatchClass = X86Mem32AsmOperand; 346 let OperandType = "OPERAND_MEMORY"; 347} 348def sdmem : Operand<v2f64> { 349 let PrintMethod = "printf64mem"; 350 let MIOperandInfo = (ops ptr_rc, i8imm, ptr_rc_nosp, i32imm, i8imm); 351 let ParserMatchClass = X86Mem64AsmOperand; 352 let OperandType = "OPERAND_MEMORY"; 353} 354 355//===----------------------------------------------------------------------===// 356// SSE pattern fragments 357//===----------------------------------------------------------------------===// 358 359// 128-bit load pattern fragments 360// NOTE: all 128-bit integer vector loads are promoted to v2i64 361def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>; 362def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>; 363def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>; 364 365// 256-bit load pattern fragments 366// NOTE: all 256-bit integer vector loads are promoted to v4i64 367def loadv8f32 : PatFrag<(ops node:$ptr), (v8f32 (load node:$ptr))>; 368def loadv4f64 : PatFrag<(ops node:$ptr), (v4f64 (load node:$ptr))>; 369def loadv4i64 : PatFrag<(ops node:$ptr), (v4i64 (load node:$ptr))>; 370 371// 512-bit load pattern fragments 372def loadv16f32 : PatFrag<(ops node:$ptr), (v16f32 (load node:$ptr))>; 373def loadv8f64 : PatFrag<(ops node:$ptr), (v8f64 (load node:$ptr))>; 374def loadv64i8 : PatFrag<(ops node:$ptr), (v64i8 (load node:$ptr))>; 375def loadv32i16 : PatFrag<(ops node:$ptr), (v32i16 (load node:$ptr))>; 376def loadv16i32 : PatFrag<(ops node:$ptr), (v16i32 (load node:$ptr))>; 377def loadv8i64 : PatFrag<(ops node:$ptr), (v8i64 (load node:$ptr))>; 378 379// 128-/256-/512-bit extload pattern fragments 380def extloadv2f32 : PatFrag<(ops node:$ptr), (v2f64 (extloadvf32 node:$ptr))>; 381def extloadv4f32 : PatFrag<(ops node:$ptr), (v4f64 (extloadvf32 node:$ptr))>; 382def extloadv8f32 : PatFrag<(ops node:$ptr), (v8f64 (extloadvf32 node:$ptr))>; 383 384// These are needed to match a scalar load that is used in a vector-only 385// math instruction such as the FP logical ops: andps, andnps, orps, xorps. 386// The memory operand is required to be a 128-bit load, so it must be converted 387// from a vector to a scalar. 388def loadf32_128 : PatFrag<(ops node:$ptr), 389 (f32 (vector_extract (loadv4f32 node:$ptr), (iPTR 0)))>; 390def loadf64_128 : PatFrag<(ops node:$ptr), 391 (f64 (vector_extract (loadv2f64 node:$ptr), (iPTR 0)))>; 392 393// Like 'store', but always requires 128-bit vector alignment. 394def alignedstore : PatFrag<(ops node:$val, node:$ptr), 395 (store node:$val, node:$ptr), [{ 396 return cast<StoreSDNode>(N)->getAlignment() >= 16; 397}]>; 398 399// Like 'store', but always requires 256-bit vector alignment. 400def alignedstore256 : PatFrag<(ops node:$val, node:$ptr), 401 (store node:$val, node:$ptr), [{ 402 return cast<StoreSDNode>(N)->getAlignment() >= 32; 403}]>; 404 405// Like 'store', but always requires 512-bit vector alignment. 406def alignedstore512 : PatFrag<(ops node:$val, node:$ptr), 407 (store node:$val, node:$ptr), [{ 408 return cast<StoreSDNode>(N)->getAlignment() >= 64; 409}]>; 410 411// Like 'load', but always requires 128-bit vector alignment. 412def alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 413 return cast<LoadSDNode>(N)->getAlignment() >= 16; 414}]>; 415 416// Like 'X86vzload', but always requires 128-bit vector alignment. 417def alignedX86vzload : PatFrag<(ops node:$ptr), (X86vzload node:$ptr), [{ 418 return cast<MemSDNode>(N)->getAlignment() >= 16; 419}]>; 420 421// Like 'load', but always requires 256-bit vector alignment. 422def alignedload256 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 423 return cast<LoadSDNode>(N)->getAlignment() >= 32; 424}]>; 425 426// Like 'load', but always requires 512-bit vector alignment. 427def alignedload512 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 428 return cast<LoadSDNode>(N)->getAlignment() >= 64; 429}]>; 430 431def alignedloadfsf32 : PatFrag<(ops node:$ptr), 432 (f32 (alignedload node:$ptr))>; 433def alignedloadfsf64 : PatFrag<(ops node:$ptr), 434 (f64 (alignedload node:$ptr))>; 435 436// 128-bit aligned load pattern fragments 437// NOTE: all 128-bit integer vector loads are promoted to v2i64 438def alignedloadv4f32 : PatFrag<(ops node:$ptr), 439 (v4f32 (alignedload node:$ptr))>; 440def alignedloadv2f64 : PatFrag<(ops node:$ptr), 441 (v2f64 (alignedload node:$ptr))>; 442def alignedloadv2i64 : PatFrag<(ops node:$ptr), 443 (v2i64 (alignedload node:$ptr))>; 444 445// 256-bit aligned load pattern fragments 446// NOTE: all 256-bit integer vector loads are promoted to v4i64 447def alignedloadv8f32 : PatFrag<(ops node:$ptr), 448 (v8f32 (alignedload256 node:$ptr))>; 449def alignedloadv4f64 : PatFrag<(ops node:$ptr), 450 (v4f64 (alignedload256 node:$ptr))>; 451def alignedloadv4i64 : PatFrag<(ops node:$ptr), 452 (v4i64 (alignedload256 node:$ptr))>; 453 454// 512-bit aligned load pattern fragments 455def alignedloadv16f32 : PatFrag<(ops node:$ptr), 456 (v16f32 (alignedload512 node:$ptr))>; 457def alignedloadv16i32 : PatFrag<(ops node:$ptr), 458 (v16i32 (alignedload512 node:$ptr))>; 459def alignedloadv8f64 : PatFrag<(ops node:$ptr), 460 (v8f64 (alignedload512 node:$ptr))>; 461def alignedloadv8i64 : PatFrag<(ops node:$ptr), 462 (v8i64 (alignedload512 node:$ptr))>; 463 464// Like 'load', but uses special alignment checks suitable for use in 465// memory operands in most SSE instructions, which are required to 466// be naturally aligned on some targets but not on others. If the subtarget 467// allows unaligned accesses, match any load, though this may require 468// setting a feature bit in the processor (on startup, for example). 469// Opteron 10h and later implement such a feature. 470def memop : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 471 return Subtarget->hasSSEUnalignedMem() 472 || cast<LoadSDNode>(N)->getAlignment() >= 16; 473}]>; 474 475def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>; 476def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>; 477 478// 128-bit memop pattern fragments 479// NOTE: all 128-bit integer vector loads are promoted to v2i64 480def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>; 481def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>; 482def memopv2i64 : PatFrag<(ops node:$ptr), (v2i64 (memop node:$ptr))>; 483 484// These are needed to match a scalar memop that is used in a vector-only 485// math instruction such as the FP logical ops: andps, andnps, orps, xorps. 486// The memory operand is required to be a 128-bit load, so it must be converted 487// from a vector to a scalar. 488def memopfsf32_128 : PatFrag<(ops node:$ptr), 489 (f32 (vector_extract (memopv4f32 node:$ptr), (iPTR 0)))>; 490def memopfsf64_128 : PatFrag<(ops node:$ptr), 491 (f64 (vector_extract (memopv2f64 node:$ptr), (iPTR 0)))>; 492 493 494// SSSE3 uses MMX registers for some instructions. They aren't aligned on a 495// 16-byte boundary. 496// FIXME: 8 byte alignment for mmx reads is not required 497def memop64 : PatFrag<(ops node:$ptr), (load node:$ptr), [{ 498 return cast<LoadSDNode>(N)->getAlignment() >= 8; 499}]>; 500 501def memopmmx : PatFrag<(ops node:$ptr), (x86mmx (memop64 node:$ptr))>; 502 503// MOVNT Support 504// Like 'store', but requires the non-temporal bit to be set 505def nontemporalstore : PatFrag<(ops node:$val, node:$ptr), 506 (st node:$val, node:$ptr), [{ 507 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) 508 return ST->isNonTemporal(); 509 return false; 510}]>; 511 512def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), 513 (st node:$val, node:$ptr), [{ 514 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) 515 return ST->isNonTemporal() && !ST->isTruncatingStore() && 516 ST->getAddressingMode() == ISD::UNINDEXED && 517 ST->getAlignment() >= 16; 518 return false; 519}]>; 520 521def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr), 522 (st node:$val, node:$ptr), [{ 523 if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) 524 return ST->isNonTemporal() && 525 ST->getAlignment() < 16; 526 return false; 527}]>; 528 529// 128-bit bitconvert pattern fragments 530def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>; 531def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>; 532def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>; 533def bc_v8i16 : PatFrag<(ops node:$in), (v8i16 (bitconvert node:$in))>; 534def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; 535def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; 536 537// 256-bit bitconvert pattern fragments 538def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>; 539def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>; 540def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; 541def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; 542def bc_v8f32 : PatFrag<(ops node:$in), (v8f32 (bitconvert node:$in))>; 543 544// 512-bit bitconvert pattern fragments 545def bc_v16i32 : PatFrag<(ops node:$in), (v16i32 (bitconvert node:$in))>; 546def bc_v8i64 : PatFrag<(ops node:$in), (v8i64 (bitconvert node:$in))>; 547def bc_v8f64 : PatFrag<(ops node:$in), (v8f64 (bitconvert node:$in))>; 548def bc_v16f32 : PatFrag<(ops node:$in), (v16f32 (bitconvert node:$in))>; 549 550def vzmovl_v2i64 : PatFrag<(ops node:$src), 551 (bitconvert (v2i64 (X86vzmovl 552 (v2i64 (scalar_to_vector (loadi64 node:$src))))))>; 553def vzmovl_v4i32 : PatFrag<(ops node:$src), 554 (bitconvert (v4i32 (X86vzmovl 555 (v4i32 (scalar_to_vector (loadi32 node:$src))))))>; 556 557def vzload_v2i64 : PatFrag<(ops node:$src), 558 (bitconvert (v2i64 (X86vzload node:$src)))>; 559 560 561def fp32imm0 : PatLeaf<(f32 fpimm), [{ 562 return N->isExactlyValue(+0.0); 563}]>; 564 565def I8Imm : SDNodeXForm<imm, [{ 566 // Transformation function: get the low 8 bits. 567 return getI8Imm((uint8_t)N->getZExtValue()); 568}]>; 569 570def FROUND_NO_EXC : ImmLeaf<i32, [{ return Imm == 8; }]>; 571def FROUND_CURRENT : ImmLeaf<i32, [{ 572 return Imm == X86::STATIC_ROUNDING::CUR_DIRECTION; 573}]>; 574 575// BYTE_imm - Transform bit immediates into byte immediates. 576def BYTE_imm : SDNodeXForm<imm, [{ 577 // Transformation function: imm >> 3 578 return getI32Imm(N->getZExtValue() >> 3); 579}]>; 580 581// EXTRACT_get_vextract128_imm xform function: convert extract_subvector index 582// to VEXTRACTF128/VEXTRACTI128 imm. 583def EXTRACT_get_vextract128_imm : SDNodeXForm<extract_subvector, [{ 584 return getI8Imm(X86::getExtractVEXTRACT128Immediate(N)); 585}]>; 586 587// INSERT_get_vinsert128_imm xform function: convert insert_subvector index to 588// VINSERTF128/VINSERTI128 imm. 589def INSERT_get_vinsert128_imm : SDNodeXForm<insert_subvector, [{ 590 return getI8Imm(X86::getInsertVINSERT128Immediate(N)); 591}]>; 592 593// EXTRACT_get_vextract256_imm xform function: convert extract_subvector index 594// to VEXTRACTF64x4 imm. 595def EXTRACT_get_vextract256_imm : SDNodeXForm<extract_subvector, [{ 596 return getI8Imm(X86::getExtractVEXTRACT256Immediate(N)); 597}]>; 598 599// INSERT_get_vinsert256_imm xform function: convert insert_subvector index to 600// VINSERTF64x4 imm. 601def INSERT_get_vinsert256_imm : SDNodeXForm<insert_subvector, [{ 602 return getI8Imm(X86::getInsertVINSERT256Immediate(N)); 603}]>; 604 605def vextract128_extract : PatFrag<(ops node:$bigvec, node:$index), 606 (extract_subvector node:$bigvec, 607 node:$index), [{ 608 return X86::isVEXTRACT128Index(N); 609}], EXTRACT_get_vextract128_imm>; 610 611def vinsert128_insert : PatFrag<(ops node:$bigvec, node:$smallvec, 612 node:$index), 613 (insert_subvector node:$bigvec, node:$smallvec, 614 node:$index), [{ 615 return X86::isVINSERT128Index(N); 616}], INSERT_get_vinsert128_imm>; 617 618 619def vextract256_extract : PatFrag<(ops node:$bigvec, node:$index), 620 (extract_subvector node:$bigvec, 621 node:$index), [{ 622 return X86::isVEXTRACT256Index(N); 623}], EXTRACT_get_vextract256_imm>; 624 625def vinsert256_insert : PatFrag<(ops node:$bigvec, node:$smallvec, 626 node:$index), 627 (insert_subvector node:$bigvec, node:$smallvec, 628 node:$index), [{ 629 return X86::isVINSERT256Index(N); 630}], INSERT_get_vinsert256_imm>; 631 632def masked_load_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), 633 (masked_load node:$src1, node:$src2, node:$src3), [{ 634 if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) 635 return Load->getAlignment() >= 16; 636 return false; 637}]>; 638 639def masked_load_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), 640 (masked_load node:$src1, node:$src2, node:$src3), [{ 641 if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) 642 return Load->getAlignment() >= 32; 643 return false; 644}]>; 645 646def masked_load_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), 647 (masked_load node:$src1, node:$src2, node:$src3), [{ 648 if (auto *Load = dyn_cast<MaskedLoadSDNode>(N)) 649 return Load->getAlignment() >= 64; 650 return false; 651}]>; 652 653def masked_load_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), 654 (masked_load node:$src1, node:$src2, node:$src3), [{ 655 return isa<MaskedLoadSDNode>(N); 656}]>; 657 658def masked_store_aligned128 : PatFrag<(ops node:$src1, node:$src2, node:$src3), 659 (masked_store node:$src1, node:$src2, node:$src3), [{ 660 if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) 661 return Store->getAlignment() >= 16; 662 return false; 663}]>; 664 665def masked_store_aligned256 : PatFrag<(ops node:$src1, node:$src2, node:$src3), 666 (masked_store node:$src1, node:$src2, node:$src3), [{ 667 if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) 668 return Store->getAlignment() >= 32; 669 return false; 670}]>; 671 672def masked_store_aligned512 : PatFrag<(ops node:$src1, node:$src2, node:$src3), 673 (masked_store node:$src1, node:$src2, node:$src3), [{ 674 if (auto *Store = dyn_cast<MaskedStoreSDNode>(N)) 675 return Store->getAlignment() >= 64; 676 return false; 677}]>; 678 679def masked_store_unaligned : PatFrag<(ops node:$src1, node:$src2, node:$src3), 680 (masked_store node:$src1, node:$src2, node:$src3), [{ 681 return isa<MaskedStoreSDNode>(N); 682}]>; 683 684