1//===-- X86InstrSSE.td - SSE Instruction Set ---------------*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the X86 SSE instruction set, defining the instructions, 11// and properties of the instructions which are needed for code generation, 12// machine code emission, and analysis. 13// 14//===----------------------------------------------------------------------===// 15 16class OpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm> { 17 InstrItinClass rr = arg_rr; 18 InstrItinClass rm = arg_rm; 19 // InstrSchedModel info. 20 X86FoldableSchedWrite Sched = WriteFAdd; 21} 22 23class SizeItins<OpndItins arg_s, OpndItins arg_d> { 24 OpndItins s = arg_s; 25 OpndItins d = arg_d; 26} 27 28 29class ShiftOpndItins<InstrItinClass arg_rr, InstrItinClass arg_rm, 30 InstrItinClass arg_ri> { 31 InstrItinClass rr = arg_rr; 32 InstrItinClass rm = arg_rm; 33 InstrItinClass ri = arg_ri; 34} 35 36 37// scalar 38let Sched = WriteFAdd in { 39def SSE_ALU_F32S : OpndItins< 40 IIC_SSE_ALU_F32S_RR, IIC_SSE_ALU_F32S_RM 41>; 42 43def SSE_ALU_F64S : OpndItins< 44 IIC_SSE_ALU_F64S_RR, IIC_SSE_ALU_F64S_RM 45>; 46} 47 48def SSE_ALU_ITINS_S : SizeItins< 49 SSE_ALU_F32S, SSE_ALU_F64S 50>; 51 52let Sched = WriteFMul in { 53def SSE_MUL_F32S : OpndItins< 54 IIC_SSE_MUL_F32S_RR, IIC_SSE_MUL_F64S_RM 55>; 56 57def SSE_MUL_F64S : OpndItins< 58 IIC_SSE_MUL_F64S_RR, IIC_SSE_MUL_F64S_RM 59>; 60} 61 62def SSE_MUL_ITINS_S : SizeItins< 63 SSE_MUL_F32S, SSE_MUL_F64S 64>; 65 66let Sched = WriteFDiv in { 67def SSE_DIV_F32S : OpndItins< 68 IIC_SSE_DIV_F32S_RR, IIC_SSE_DIV_F64S_RM 69>; 70 71def SSE_DIV_F64S : OpndItins< 72 IIC_SSE_DIV_F64S_RR, IIC_SSE_DIV_F64S_RM 73>; 74} 75 76def SSE_DIV_ITINS_S : SizeItins< 77 SSE_DIV_F32S, SSE_DIV_F64S 78>; 79 80// parallel 81let Sched = WriteFAdd in { 82def SSE_ALU_F32P : OpndItins< 83 IIC_SSE_ALU_F32P_RR, IIC_SSE_ALU_F32P_RM 84>; 85 86def SSE_ALU_F64P : OpndItins< 87 IIC_SSE_ALU_F64P_RR, IIC_SSE_ALU_F64P_RM 88>; 89} 90 91def SSE_ALU_ITINS_P : SizeItins< 92 SSE_ALU_F32P, SSE_ALU_F64P 93>; 94 95let Sched = WriteFMul in { 96def SSE_MUL_F32P : OpndItins< 97 IIC_SSE_MUL_F32P_RR, IIC_SSE_MUL_F64P_RM 98>; 99 100def SSE_MUL_F64P : OpndItins< 101 IIC_SSE_MUL_F64P_RR, IIC_SSE_MUL_F64P_RM 102>; 103} 104 105def SSE_MUL_ITINS_P : SizeItins< 106 SSE_MUL_F32P, SSE_MUL_F64P 107>; 108 109let Sched = WriteFDiv in { 110def SSE_DIV_F32P : OpndItins< 111 IIC_SSE_DIV_F32P_RR, IIC_SSE_DIV_F64P_RM 112>; 113 114def SSE_DIV_F64P : OpndItins< 115 IIC_SSE_DIV_F64P_RR, IIC_SSE_DIV_F64P_RM 116>; 117} 118 119def SSE_DIV_ITINS_P : SizeItins< 120 SSE_DIV_F32P, SSE_DIV_F64P 121>; 122 123let Sched = WriteVecLogic in 124def SSE_VEC_BIT_ITINS_P : OpndItins< 125 IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 126>; 127 128def SSE_BIT_ITINS_P : OpndItins< 129 IIC_SSE_BIT_P_RR, IIC_SSE_BIT_P_RM 130>; 131 132let Sched = WriteVecALU in { 133def SSE_INTALU_ITINS_P : OpndItins< 134 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 135>; 136 137def SSE_INTALUQ_ITINS_P : OpndItins< 138 IIC_SSE_INTALUQ_P_RR, IIC_SSE_INTALUQ_P_RM 139>; 140} 141 142let Sched = WriteVecIMul in 143def SSE_INTMUL_ITINS_P : OpndItins< 144 IIC_SSE_INTMUL_P_RR, IIC_SSE_INTMUL_P_RM 145>; 146 147def SSE_INTSHIFT_ITINS_P : ShiftOpndItins< 148 IIC_SSE_INTSH_P_RR, IIC_SSE_INTSH_P_RM, IIC_SSE_INTSH_P_RI 149>; 150 151def SSE_MOVA_ITINS : OpndItins< 152 IIC_SSE_MOVA_P_RR, IIC_SSE_MOVA_P_RM 153>; 154 155def SSE_MOVU_ITINS : OpndItins< 156 IIC_SSE_MOVU_P_RR, IIC_SSE_MOVU_P_RM 157>; 158 159def SSE_DPPD_ITINS : OpndItins< 160 IIC_SSE_DPPD_RR, IIC_SSE_DPPD_RM 161>; 162 163def SSE_DPPS_ITINS : OpndItins< 164 IIC_SSE_DPPS_RR, IIC_SSE_DPPD_RM 165>; 166 167def DEFAULT_ITINS : OpndItins< 168 IIC_ALU_NONMEM, IIC_ALU_MEM 169>; 170 171def SSE_EXTRACT_ITINS : OpndItins< 172 IIC_SSE_EXTRACTPS_RR, IIC_SSE_EXTRACTPS_RM 173>; 174 175def SSE_INSERT_ITINS : OpndItins< 176 IIC_SSE_INSERTPS_RR, IIC_SSE_INSERTPS_RM 177>; 178 179let Sched = WriteMPSAD in 180def SSE_MPSADBW_ITINS : OpndItins< 181 IIC_SSE_MPSADBW_RR, IIC_SSE_MPSADBW_RM 182>; 183 184let Sched = WriteVecIMul in 185def SSE_PMULLD_ITINS : OpndItins< 186 IIC_SSE_PMULLD_RR, IIC_SSE_PMULLD_RM 187>; 188 189// Definitions for backward compatibility. 190// The instructions mapped on these definitions uses a different itinerary 191// than the actual scheduling model. 192let Sched = WriteShuffle in 193def DEFAULT_ITINS_SHUFFLESCHED : OpndItins< 194 IIC_ALU_NONMEM, IIC_ALU_MEM 195>; 196 197let Sched = WriteVecIMul in 198def DEFAULT_ITINS_VECIMULSCHED : OpndItins< 199 IIC_ALU_NONMEM, IIC_ALU_MEM 200>; 201 202let Sched = WriteShuffle in 203def SSE_INTALU_ITINS_SHUFF_P : OpndItins< 204 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 205>; 206 207let Sched = WriteMPSAD in 208def DEFAULT_ITINS_MPSADSCHED : OpndItins< 209 IIC_ALU_NONMEM, IIC_ALU_MEM 210>; 211 212let Sched = WriteFBlend in 213def DEFAULT_ITINS_FBLENDSCHED : OpndItins< 214 IIC_ALU_NONMEM, IIC_ALU_MEM 215>; 216 217let Sched = WriteBlend in 218def DEFAULT_ITINS_BLENDSCHED : OpndItins< 219 IIC_ALU_NONMEM, IIC_ALU_MEM 220>; 221 222let Sched = WriteVarBlend in 223def DEFAULT_ITINS_VARBLENDSCHED : OpndItins< 224 IIC_ALU_NONMEM, IIC_ALU_MEM 225>; 226 227let Sched = WriteFBlend in 228def SSE_INTALU_ITINS_FBLEND_P : OpndItins< 229 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 230>; 231 232let Sched = WriteBlend in 233def SSE_INTALU_ITINS_BLEND_P : OpndItins< 234 IIC_SSE_INTALU_P_RR, IIC_SSE_INTALU_P_RM 235>; 236 237//===----------------------------------------------------------------------===// 238// SSE 1 & 2 Instructions Classes 239//===----------------------------------------------------------------------===// 240 241/// sse12_fp_scalar - SSE 1 & 2 scalar instructions class 242multiclass sse12_fp_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode, 243 RegisterClass RC, X86MemOperand x86memop, 244 OpndItins itins, 245 bit Is2Addr = 1> { 246 let isCommutable = 1 in { 247 def rr : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 248 !if(Is2Addr, 249 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 250 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 251 [(set RC:$dst, (OpNode RC:$src1, RC:$src2))], itins.rr>, 252 Sched<[itins.Sched]>; 253 } 254 def rm : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 255 !if(Is2Addr, 256 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 257 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 258 [(set RC:$dst, (OpNode RC:$src1, (load addr:$src2)))], itins.rm>, 259 Sched<[itins.Sched.Folded, ReadAfterLd]>; 260} 261 262/// sse12_fp_scalar_int - SSE 1 & 2 scalar instructions intrinsics class 263multiclass sse12_fp_scalar_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 264 string asm, string SSEVer, string FPSizeStr, 265 Operand memopr, ComplexPattern mem_cpat, 266 OpndItins itins, 267 bit Is2Addr = 1> { 268let isCodeGenOnly = 1 in { 269 def rr_Int : SI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 270 !if(Is2Addr, 271 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 272 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 273 [(set RC:$dst, (!cast<Intrinsic>( 274 !strconcat("int_x86_sse", SSEVer, "_", OpcodeStr, FPSizeStr)) 275 RC:$src1, RC:$src2))], itins.rr>, 276 Sched<[itins.Sched]>; 277 def rm_Int : SI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, memopr:$src2), 278 !if(Is2Addr, 279 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 280 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 281 [(set RC:$dst, (!cast<Intrinsic>(!strconcat("int_x86_sse", 282 SSEVer, "_", OpcodeStr, FPSizeStr)) 283 RC:$src1, mem_cpat:$src2))], itins.rm>, 284 Sched<[itins.Sched.Folded, ReadAfterLd]>; 285} 286} 287 288/// sse12_fp_packed - SSE 1 & 2 packed instructions class 289multiclass sse12_fp_packed<bits<8> opc, string OpcodeStr, SDNode OpNode, 290 RegisterClass RC, ValueType vt, 291 X86MemOperand x86memop, PatFrag mem_frag, 292 Domain d, OpndItins itins, bit Is2Addr = 1> { 293 let isCommutable = 1 in 294 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 295 !if(Is2Addr, 296 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 297 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 298 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], itins.rr, d>, 299 Sched<[itins.Sched]>; 300 let mayLoad = 1 in 301 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 302 !if(Is2Addr, 303 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 304 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 305 [(set RC:$dst, (OpNode RC:$src1, (mem_frag addr:$src2)))], 306 itins.rm, d>, 307 Sched<[itins.Sched.Folded, ReadAfterLd]>; 308} 309 310/// sse12_fp_packed_logical_rm - SSE 1 & 2 packed instructions class 311multiclass sse12_fp_packed_logical_rm<bits<8> opc, RegisterClass RC, Domain d, 312 string OpcodeStr, X86MemOperand x86memop, 313 list<dag> pat_rr, list<dag> pat_rm, 314 bit Is2Addr = 1> { 315 let isCommutable = 1, hasSideEffects = 0 in 316 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 317 !if(Is2Addr, 318 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 319 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 320 pat_rr, NoItinerary, d>, 321 Sched<[WriteVecLogic]>; 322 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 323 !if(Is2Addr, 324 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 325 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 326 pat_rm, NoItinerary, d>, 327 Sched<[WriteVecLogicLd, ReadAfterLd]>; 328} 329 330//===----------------------------------------------------------------------===// 331// Non-instruction patterns 332//===----------------------------------------------------------------------===// 333 334// A vector extract of the first f32/f64 position is a subregister copy 335def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 336 (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32)>; 337def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 338 (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64)>; 339 340// A 128-bit subvector extract from the first 256-bit vector position 341// is a subregister copy that needs no instruction. 342def : Pat<(v4i32 (extract_subvector (v8i32 VR256:$src), (iPTR 0))), 343 (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), sub_xmm))>; 344def : Pat<(v4f32 (extract_subvector (v8f32 VR256:$src), (iPTR 0))), 345 (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm))>; 346 347def : Pat<(v2i64 (extract_subvector (v4i64 VR256:$src), (iPTR 0))), 348 (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), sub_xmm))>; 349def : Pat<(v2f64 (extract_subvector (v4f64 VR256:$src), (iPTR 0))), 350 (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), sub_xmm))>; 351 352def : Pat<(v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), 353 (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), sub_xmm))>; 354def : Pat<(v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), 355 (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), sub_xmm))>; 356 357// A 128-bit subvector insert to the first 256-bit vector position 358// is a subregister copy that needs no instruction. 359let AddedComplexity = 25 in { // to give priority over vinsertf128rm 360def : Pat<(insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)), 361 (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 362def : Pat<(insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)), 363 (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 364def : Pat<(insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)), 365 (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 366def : Pat<(insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)), 367 (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 368def : Pat<(insert_subvector undef, (v8i16 VR128:$src), (iPTR 0)), 369 (INSERT_SUBREG (v16i16 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 370def : Pat<(insert_subvector undef, (v16i8 VR128:$src), (iPTR 0)), 371 (INSERT_SUBREG (v32i8 (IMPLICIT_DEF)), VR128:$src, sub_xmm)>; 372} 373 374// Implicitly promote a 32-bit scalar to a vector. 375def : Pat<(v4f32 (scalar_to_vector FR32:$src)), 376 (COPY_TO_REGCLASS FR32:$src, VR128)>; 377def : Pat<(v8f32 (scalar_to_vector FR32:$src)), 378 (COPY_TO_REGCLASS FR32:$src, VR128)>; 379// Implicitly promote a 64-bit scalar to a vector. 380def : Pat<(v2f64 (scalar_to_vector FR64:$src)), 381 (COPY_TO_REGCLASS FR64:$src, VR128)>; 382def : Pat<(v4f64 (scalar_to_vector FR64:$src)), 383 (COPY_TO_REGCLASS FR64:$src, VR128)>; 384 385// Bitcasts between 128-bit vector types. Return the original type since 386// no instruction is needed for the conversion 387let Predicates = [HasSSE2] in { 388 def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; 389 def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; 390 def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; 391 def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; 392 def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; 393 def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; 394 def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; 395 def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; 396 def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; 397 def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; 398 def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; 399 def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; 400 def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; 401 def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; 402 def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; 403 def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; 404 def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; 405 def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; 406 def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; 407 def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; 408 def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; 409 def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; 410 def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; 411 def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; 412 def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; 413 def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; 414 def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; 415 def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; 416 def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; 417 def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; 418} 419 420// Bitcasts between 256-bit vector types. Return the original type since 421// no instruction is needed for the conversion 422let Predicates = [HasAVX] in { 423 def : Pat<(v4f64 (bitconvert (v8f32 VR256:$src))), (v4f64 VR256:$src)>; 424 def : Pat<(v4f64 (bitconvert (v8i32 VR256:$src))), (v4f64 VR256:$src)>; 425 def : Pat<(v4f64 (bitconvert (v4i64 VR256:$src))), (v4f64 VR256:$src)>; 426 def : Pat<(v4f64 (bitconvert (v16i16 VR256:$src))), (v4f64 VR256:$src)>; 427 def : Pat<(v4f64 (bitconvert (v32i8 VR256:$src))), (v4f64 VR256:$src)>; 428 def : Pat<(v8f32 (bitconvert (v8i32 VR256:$src))), (v8f32 VR256:$src)>; 429 def : Pat<(v8f32 (bitconvert (v4i64 VR256:$src))), (v8f32 VR256:$src)>; 430 def : Pat<(v8f32 (bitconvert (v4f64 VR256:$src))), (v8f32 VR256:$src)>; 431 def : Pat<(v8f32 (bitconvert (v32i8 VR256:$src))), (v8f32 VR256:$src)>; 432 def : Pat<(v8f32 (bitconvert (v16i16 VR256:$src))), (v8f32 VR256:$src)>; 433 def : Pat<(v4i64 (bitconvert (v8f32 VR256:$src))), (v4i64 VR256:$src)>; 434 def : Pat<(v4i64 (bitconvert (v8i32 VR256:$src))), (v4i64 VR256:$src)>; 435 def : Pat<(v4i64 (bitconvert (v4f64 VR256:$src))), (v4i64 VR256:$src)>; 436 def : Pat<(v4i64 (bitconvert (v32i8 VR256:$src))), (v4i64 VR256:$src)>; 437 def : Pat<(v4i64 (bitconvert (v16i16 VR256:$src))), (v4i64 VR256:$src)>; 438 def : Pat<(v32i8 (bitconvert (v4f64 VR256:$src))), (v32i8 VR256:$src)>; 439 def : Pat<(v32i8 (bitconvert (v4i64 VR256:$src))), (v32i8 VR256:$src)>; 440 def : Pat<(v32i8 (bitconvert (v8f32 VR256:$src))), (v32i8 VR256:$src)>; 441 def : Pat<(v32i8 (bitconvert (v8i32 VR256:$src))), (v32i8 VR256:$src)>; 442 def : Pat<(v32i8 (bitconvert (v16i16 VR256:$src))), (v32i8 VR256:$src)>; 443 def : Pat<(v8i32 (bitconvert (v32i8 VR256:$src))), (v8i32 VR256:$src)>; 444 def : Pat<(v8i32 (bitconvert (v16i16 VR256:$src))), (v8i32 VR256:$src)>; 445 def : Pat<(v8i32 (bitconvert (v8f32 VR256:$src))), (v8i32 VR256:$src)>; 446 def : Pat<(v8i32 (bitconvert (v4i64 VR256:$src))), (v8i32 VR256:$src)>; 447 def : Pat<(v8i32 (bitconvert (v4f64 VR256:$src))), (v8i32 VR256:$src)>; 448 def : Pat<(v16i16 (bitconvert (v8f32 VR256:$src))), (v16i16 VR256:$src)>; 449 def : Pat<(v16i16 (bitconvert (v8i32 VR256:$src))), (v16i16 VR256:$src)>; 450 def : Pat<(v16i16 (bitconvert (v4i64 VR256:$src))), (v16i16 VR256:$src)>; 451 def : Pat<(v16i16 (bitconvert (v4f64 VR256:$src))), (v16i16 VR256:$src)>; 452 def : Pat<(v16i16 (bitconvert (v32i8 VR256:$src))), (v16i16 VR256:$src)>; 453} 454 455// Alias instructions that map fld0 to xorps for sse or vxorps for avx. 456// This is expanded by ExpandPostRAPseudos. 457let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 458 isPseudo = 1, SchedRW = [WriteZero] in { 459 def FsFLD0SS : I<0, Pseudo, (outs FR32:$dst), (ins), "", 460 [(set FR32:$dst, fp32imm0)]>, Requires<[HasSSE1]>; 461 def FsFLD0SD : I<0, Pseudo, (outs FR64:$dst), (ins), "", 462 [(set FR64:$dst, fpimm0)]>, Requires<[HasSSE2]>; 463} 464 465//===----------------------------------------------------------------------===// 466// AVX & SSE - Zero/One Vectors 467//===----------------------------------------------------------------------===// 468 469// Alias instruction that maps zero vector to pxor / xorp* for sse. 470// This is expanded by ExpandPostRAPseudos to an xorps / vxorps, and then 471// swizzled by ExecutionDepsFix to pxor. 472// We set canFoldAsLoad because this can be converted to a constant-pool 473// load of an all-zeros value if folding it would be beneficial. 474let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 475 isPseudo = 1, SchedRW = [WriteZero] in { 476def V_SET0 : I<0, Pseudo, (outs VR128:$dst), (ins), "", 477 [(set VR128:$dst, (v4f32 immAllZerosV))]>; 478} 479 480def : Pat<(v2f64 immAllZerosV), (V_SET0)>; 481def : Pat<(v4i32 immAllZerosV), (V_SET0)>; 482def : Pat<(v2i64 immAllZerosV), (V_SET0)>; 483def : Pat<(v8i16 immAllZerosV), (V_SET0)>; 484def : Pat<(v16i8 immAllZerosV), (V_SET0)>; 485 486 487// The same as done above but for AVX. The 256-bit AVX1 ISA doesn't support PI, 488// and doesn't need it because on sandy bridge the register is set to zero 489// at the rename stage without using any execution unit, so SET0PSY 490// and SET0PDY can be used for vector int instructions without penalty 491let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 492 isPseudo = 1, Predicates = [HasAVX], SchedRW = [WriteZero] in { 493def AVX_SET0 : I<0, Pseudo, (outs VR256:$dst), (ins), "", 494 [(set VR256:$dst, (v8f32 immAllZerosV))]>; 495} 496 497let Predicates = [HasAVX] in 498 def : Pat<(v4f64 immAllZerosV), (AVX_SET0)>; 499 500let Predicates = [HasAVX2] in { 501 def : Pat<(v4i64 immAllZerosV), (AVX_SET0)>; 502 def : Pat<(v8i32 immAllZerosV), (AVX_SET0)>; 503 def : Pat<(v16i16 immAllZerosV), (AVX_SET0)>; 504 def : Pat<(v32i8 immAllZerosV), (AVX_SET0)>; 505} 506 507// AVX1 has no support for 256-bit integer instructions, but since the 128-bit 508// VPXOR instruction writes zero to its upper part, it's safe build zeros. 509let Predicates = [HasAVX1Only] in { 510def : Pat<(v32i8 immAllZerosV), (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 511def : Pat<(bc_v32i8 (v8f32 immAllZerosV)), 512 (SUBREG_TO_REG (i8 0), (V_SET0), sub_xmm)>; 513 514def : Pat<(v16i16 immAllZerosV), (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 515def : Pat<(bc_v16i16 (v8f32 immAllZerosV)), 516 (SUBREG_TO_REG (i16 0), (V_SET0), sub_xmm)>; 517 518def : Pat<(v8i32 immAllZerosV), (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 519def : Pat<(bc_v8i32 (v8f32 immAllZerosV)), 520 (SUBREG_TO_REG (i32 0), (V_SET0), sub_xmm)>; 521 522def : Pat<(v4i64 immAllZerosV), (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 523def : Pat<(bc_v4i64 (v8f32 immAllZerosV)), 524 (SUBREG_TO_REG (i64 0), (V_SET0), sub_xmm)>; 525} 526 527// We set canFoldAsLoad because this can be converted to a constant-pool 528// load of an all-ones value if folding it would be beneficial. 529let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, 530 isPseudo = 1, SchedRW = [WriteZero] in { 531 def V_SETALLONES : I<0, Pseudo, (outs VR128:$dst), (ins), "", 532 [(set VR128:$dst, (v4i32 immAllOnesV))]>; 533 let Predicates = [HasAVX2] in 534 def AVX2_SETALLONES : I<0, Pseudo, (outs VR256:$dst), (ins), "", 535 [(set VR256:$dst, (v8i32 immAllOnesV))]>; 536} 537 538 539//===----------------------------------------------------------------------===// 540// SSE 1 & 2 - Move FP Scalar Instructions 541// 542// Move Instructions. Register-to-register movss/movsd is not used for FR32/64 543// register copies because it's a partial register update; Register-to-register 544// movss/movsd is not modeled as an INSERT_SUBREG because INSERT_SUBREG requires 545// that the insert be implementable in terms of a copy, and just mentioned, we 546// don't use movss/movsd for copies. 547//===----------------------------------------------------------------------===// 548 549multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, 550 X86MemOperand x86memop, string base_opc, 551 string asm_opr, Domain d = GenericDomain> { 552 def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), 553 (ins VR128:$src1, RC:$src2), 554 !strconcat(base_opc, asm_opr), 555 [(set VR128:$dst, (vt (OpNode VR128:$src1, 556 (scalar_to_vector RC:$src2))))], 557 IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>; 558 559 // For the disassembler 560 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 561 def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), 562 (ins VR128:$src1, RC:$src2), 563 !strconcat(base_opc, asm_opr), 564 [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>; 565} 566 567multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, 568 X86MemOperand x86memop, string OpcodeStr, 569 Domain d = GenericDomain> { 570 // AVX 571 defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 572 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d>, 573 VEX_4V, VEX_LIG; 574 575 def V#NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 576 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 577 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>, 578 VEX, VEX_LIG, Sched<[WriteStore]>; 579 // SSE1 & 2 580 let Constraints = "$src1 = $dst" in { 581 defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, 582 "\t{$src2, $dst|$dst, $src2}", d>; 583 } 584 585 def NAME#mr : SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), 586 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 587 [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR, d>, 588 Sched<[WriteStore]>; 589} 590 591// Loading from memory automatically zeroing upper bits. 592multiclass sse12_move_rm<RegisterClass RC, X86MemOperand x86memop, 593 PatFrag mem_pat, string OpcodeStr, 594 Domain d = GenericDomain> { 595 def V#NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 596 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 597 [(set RC:$dst, (mem_pat addr:$src))], 598 IIC_SSE_MOV_S_RM, d>, VEX, VEX_LIG, Sched<[WriteLoad]>; 599 def NAME#rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 600 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 601 [(set RC:$dst, (mem_pat addr:$src))], 602 IIC_SSE_MOV_S_RM, d>, Sched<[WriteLoad]>; 603} 604 605defm MOVSS : sse12_move<FR32, X86Movss, v4f32, f32mem, "movss", 606 SSEPackedSingle>, XS; 607defm MOVSD : sse12_move<FR64, X86Movsd, v2f64, f64mem, "movsd", 608 SSEPackedDouble>, XD; 609 610let canFoldAsLoad = 1, isReMaterializable = 1 in { 611 defm MOVSS : sse12_move_rm<FR32, f32mem, loadf32, "movss", 612 SSEPackedSingle>, XS; 613 614 let AddedComplexity = 20 in 615 defm MOVSD : sse12_move_rm<FR64, f64mem, loadf64, "movsd", 616 SSEPackedDouble>, XD; 617} 618 619// Patterns 620let Predicates = [UseAVX] in { 621 let AddedComplexity = 20 in { 622 // MOVSSrm zeros the high parts of the register; represent this 623 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 624 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 625 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 626 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 627 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 628 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 629 (COPY_TO_REGCLASS (VMOVSSrm addr:$src), VR128)>; 630 631 // MOVSDrm zeros the high parts of the register; represent this 632 // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 633 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 634 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 635 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 636 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 637 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 638 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 639 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 640 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 641 def : Pat<(v2f64 (X86vzload addr:$src)), 642 (COPY_TO_REGCLASS (VMOVSDrm addr:$src), VR128)>; 643 644 // Represent the same patterns above but in the form they appear for 645 // 256-bit types 646 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 647 (v4f32 (scalar_to_vector (loadf32 addr:$src))), (iPTR 0)))), 648 (SUBREG_TO_REG (i32 0), (VMOVSSrm addr:$src), sub_xmm)>; 649 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 650 (v2f64 (scalar_to_vector (loadf64 addr:$src))), (iPTR 0)))), 651 (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; 652 } 653 654 // Extract and store. 655 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 656 addr:$dst), 657 (VMOVSSmr addr:$dst, (COPY_TO_REGCLASS (v4f32 VR128:$src), FR32))>; 658 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 659 addr:$dst), 660 (VMOVSDmr addr:$dst, (COPY_TO_REGCLASS (v2f64 VR128:$src), FR64))>; 661 662 // Shuffle with VMOVSS 663 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 664 (VMOVSSrr (v4i32 VR128:$src1), 665 (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; 666 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 667 (VMOVSSrr (v4f32 VR128:$src1), 668 (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; 669 670 // 256-bit variants 671 def : Pat<(v8i32 (X86Movss VR256:$src1, VR256:$src2)), 672 (SUBREG_TO_REG (i32 0), 673 (VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_xmm), 674 (EXTRACT_SUBREG (v8i32 VR256:$src2), sub_xmm)), 675 sub_xmm)>; 676 def : Pat<(v8f32 (X86Movss VR256:$src1, VR256:$src2)), 677 (SUBREG_TO_REG (i32 0), 678 (VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_xmm), 679 (EXTRACT_SUBREG (v8f32 VR256:$src2), sub_xmm)), 680 sub_xmm)>; 681 682 // Shuffle with VMOVSD 683 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 684 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 685 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 686 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 687 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 688 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 689 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 690 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 691 692 // 256-bit variants 693 def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)), 694 (SUBREG_TO_REG (i32 0), 695 (VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_xmm), 696 (EXTRACT_SUBREG (v4i64 VR256:$src2), sub_xmm)), 697 sub_xmm)>; 698 def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)), 699 (SUBREG_TO_REG (i32 0), 700 (VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_xmm), 701 (EXTRACT_SUBREG (v4f64 VR256:$src2), sub_xmm)), 702 sub_xmm)>; 703 704 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 705 // is during lowering, where it's not possible to recognize the fold cause 706 // it has two uses through a bitcast. One use disappears at isel time and the 707 // fold opportunity reappears. 708 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 709 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 710 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 711 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 712 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 713 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 714 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 715 (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 716} 717 718let Predicates = [UseSSE1] in { 719 let Predicates = [NoSSE41], AddedComplexity = 15 in { 720 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 721 // MOVSS to the lower bits. 722 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 723 (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 724 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 725 (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 726 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 727 (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; 728 } 729 730 let AddedComplexity = 20 in { 731 // MOVSSrm already zeros the high parts of the register. 732 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), 733 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 734 def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))), 735 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 736 def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))), 737 (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; 738 } 739 740 // Extract and store. 741 def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))), 742 addr:$dst), 743 (MOVSSmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR32))>; 744 745 // Shuffle with MOVSS 746 def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), 747 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 748 def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), 749 (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; 750} 751 752let Predicates = [UseSSE2] in { 753 let Predicates = [NoSSE41], AddedComplexity = 15 in { 754 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 755 // MOVSD to the lower bits. 756 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 757 (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 758 } 759 760 let AddedComplexity = 20 in { 761 // MOVSDrm already zeros the high parts of the register. 762 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), 763 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 764 def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))), 765 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 766 def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))), 767 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 768 def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))), 769 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 770 def : Pat<(v2f64 (X86vzload addr:$src)), 771 (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; 772 } 773 774 // Extract and store. 775 def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))), 776 addr:$dst), 777 (MOVSDmr addr:$dst, (COPY_TO_REGCLASS VR128:$src, FR64))>; 778 779 // Shuffle with MOVSD 780 def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), 781 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 782 def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), 783 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 784 def : Pat<(v4f32 (X86Movsd VR128:$src1, VR128:$src2)), 785 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 786 def : Pat<(v4i32 (X86Movsd VR128:$src1, VR128:$src2)), 787 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 788 789 // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem 790 // is during lowering, where it's not possible to recognize the fold because 791 // it has two uses through a bitcast. One use disappears at isel time and the 792 // fold opportunity reappears. 793 def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), 794 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 795 def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), 796 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 797 def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), 798 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 799 def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), 800 (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; 801} 802 803//===----------------------------------------------------------------------===// 804// SSE 1 & 2 - Move Aligned/Unaligned FP Instructions 805//===----------------------------------------------------------------------===// 806 807multiclass sse12_mov_packed<bits<8> opc, RegisterClass RC, 808 X86MemOperand x86memop, PatFrag ld_frag, 809 string asm, Domain d, 810 OpndItins itins, 811 bit IsReMaterializable = 1> { 812let hasSideEffects = 0 in 813 def rr : PI<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 814 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), [], itins.rr, d>, 815 Sched<[WriteFShuffle]>; 816let canFoldAsLoad = 1, isReMaterializable = IsReMaterializable in 817 def rm : PI<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 818 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 819 [(set RC:$dst, (ld_frag addr:$src))], itins.rm, d>, 820 Sched<[WriteLoad]>; 821} 822 823let Predicates = [HasAVX, NoVLX] in { 824defm VMOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 825 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 826 PS, VEX; 827defm VMOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 828 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 829 PD, VEX; 830defm VMOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 831 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 832 PS, VEX; 833defm VMOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 834 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 835 PD, VEX; 836 837defm VMOVAPSY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv8f32, 838 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 839 PS, VEX, VEX_L; 840defm VMOVAPDY : sse12_mov_packed<0x28, VR256, f256mem, alignedloadv4f64, 841 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 842 PD, VEX, VEX_L; 843defm VMOVUPSY : sse12_mov_packed<0x10, VR256, f256mem, loadv8f32, 844 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 845 PS, VEX, VEX_L; 846defm VMOVUPDY : sse12_mov_packed<0x10, VR256, f256mem, loadv4f64, 847 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 848 PD, VEX, VEX_L; 849} 850 851let Predicates = [UseSSE1] in { 852defm MOVAPS : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv4f32, 853 "movaps", SSEPackedSingle, SSE_MOVA_ITINS>, 854 PS; 855defm MOVUPS : sse12_mov_packed<0x10, VR128, f128mem, loadv4f32, 856 "movups", SSEPackedSingle, SSE_MOVU_ITINS>, 857 PS; 858} 859let Predicates = [UseSSE2] in { 860defm MOVAPD : sse12_mov_packed<0x28, VR128, f128mem, alignedloadv2f64, 861 "movapd", SSEPackedDouble, SSE_MOVA_ITINS>, 862 PD; 863defm MOVUPD : sse12_mov_packed<0x10, VR128, f128mem, loadv2f64, 864 "movupd", SSEPackedDouble, SSE_MOVU_ITINS, 0>, 865 PD; 866} 867 868let SchedRW = [WriteStore], Predicates = [HasAVX, NoVLX] in { 869def VMOVAPSmr : VPSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 870 "movaps\t{$src, $dst|$dst, $src}", 871 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 872 IIC_SSE_MOVA_P_MR>, VEX; 873def VMOVAPDmr : VPDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 874 "movapd\t{$src, $dst|$dst, $src}", 875 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 876 IIC_SSE_MOVA_P_MR>, VEX; 877def VMOVUPSmr : VPSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 878 "movups\t{$src, $dst|$dst, $src}", 879 [(store (v4f32 VR128:$src), addr:$dst)], 880 IIC_SSE_MOVU_P_MR>, VEX; 881def VMOVUPDmr : VPDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 882 "movupd\t{$src, $dst|$dst, $src}", 883 [(store (v2f64 VR128:$src), addr:$dst)], 884 IIC_SSE_MOVU_P_MR>, VEX; 885def VMOVAPSYmr : VPSI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 886 "movaps\t{$src, $dst|$dst, $src}", 887 [(alignedstore256 (v8f32 VR256:$src), addr:$dst)], 888 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 889def VMOVAPDYmr : VPDI<0x29, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 890 "movapd\t{$src, $dst|$dst, $src}", 891 [(alignedstore256 (v4f64 VR256:$src), addr:$dst)], 892 IIC_SSE_MOVA_P_MR>, VEX, VEX_L; 893def VMOVUPSYmr : VPSI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 894 "movups\t{$src, $dst|$dst, $src}", 895 [(store (v8f32 VR256:$src), addr:$dst)], 896 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 897def VMOVUPDYmr : VPDI<0x11, MRMDestMem, (outs), (ins f256mem:$dst, VR256:$src), 898 "movupd\t{$src, $dst|$dst, $src}", 899 [(store (v4f64 VR256:$src), addr:$dst)], 900 IIC_SSE_MOVU_P_MR>, VEX, VEX_L; 901} // SchedRW 902 903// For disassembler 904let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 905 SchedRW = [WriteFShuffle] in { 906 def VMOVAPSrr_REV : VPSI<0x29, MRMDestReg, (outs VR128:$dst), 907 (ins VR128:$src), 908 "movaps\t{$src, $dst|$dst, $src}", [], 909 IIC_SSE_MOVA_P_RR>, VEX; 910 def VMOVAPDrr_REV : VPDI<0x29, MRMDestReg, (outs VR128:$dst), 911 (ins VR128:$src), 912 "movapd\t{$src, $dst|$dst, $src}", [], 913 IIC_SSE_MOVA_P_RR>, VEX; 914 def VMOVUPSrr_REV : VPSI<0x11, MRMDestReg, (outs VR128:$dst), 915 (ins VR128:$src), 916 "movups\t{$src, $dst|$dst, $src}", [], 917 IIC_SSE_MOVU_P_RR>, VEX; 918 def VMOVUPDrr_REV : VPDI<0x11, MRMDestReg, (outs VR128:$dst), 919 (ins VR128:$src), 920 "movupd\t{$src, $dst|$dst, $src}", [], 921 IIC_SSE_MOVU_P_RR>, VEX; 922 def VMOVAPSYrr_REV : VPSI<0x29, MRMDestReg, (outs VR256:$dst), 923 (ins VR256:$src), 924 "movaps\t{$src, $dst|$dst, $src}", [], 925 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 926 def VMOVAPDYrr_REV : VPDI<0x29, MRMDestReg, (outs VR256:$dst), 927 (ins VR256:$src), 928 "movapd\t{$src, $dst|$dst, $src}", [], 929 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 930 def VMOVUPSYrr_REV : VPSI<0x11, MRMDestReg, (outs VR256:$dst), 931 (ins VR256:$src), 932 "movups\t{$src, $dst|$dst, $src}", [], 933 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 934 def VMOVUPDYrr_REV : VPDI<0x11, MRMDestReg, (outs VR256:$dst), 935 (ins VR256:$src), 936 "movupd\t{$src, $dst|$dst, $src}", [], 937 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 938} 939 940let Predicates = [HasAVX] in { 941def : Pat<(v8i32 (X86vzmovl 942 (insert_subvector undef, (v4i32 VR128:$src), (iPTR 0)))), 943 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 944def : Pat<(v4i64 (X86vzmovl 945 (insert_subvector undef, (v2i64 VR128:$src), (iPTR 0)))), 946 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 947def : Pat<(v8f32 (X86vzmovl 948 (insert_subvector undef, (v4f32 VR128:$src), (iPTR 0)))), 949 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 950def : Pat<(v4f64 (X86vzmovl 951 (insert_subvector undef, (v2f64 VR128:$src), (iPTR 0)))), 952 (SUBREG_TO_REG (i32 0), (VMOVAPSrr VR128:$src), sub_xmm)>; 953} 954 955 956def : Pat<(int_x86_avx_storeu_ps_256 addr:$dst, VR256:$src), 957 (VMOVUPSYmr addr:$dst, VR256:$src)>; 958def : Pat<(int_x86_avx_storeu_pd_256 addr:$dst, VR256:$src), 959 (VMOVUPDYmr addr:$dst, VR256:$src)>; 960 961let SchedRW = [WriteStore] in { 962def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 963 "movaps\t{$src, $dst|$dst, $src}", 964 [(alignedstore (v4f32 VR128:$src), addr:$dst)], 965 IIC_SSE_MOVA_P_MR>; 966def MOVAPDmr : PDI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 967 "movapd\t{$src, $dst|$dst, $src}", 968 [(alignedstore (v2f64 VR128:$src), addr:$dst)], 969 IIC_SSE_MOVA_P_MR>; 970def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 971 "movups\t{$src, $dst|$dst, $src}", 972 [(store (v4f32 VR128:$src), addr:$dst)], 973 IIC_SSE_MOVU_P_MR>; 974def MOVUPDmr : PDI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 975 "movupd\t{$src, $dst|$dst, $src}", 976 [(store (v2f64 VR128:$src), addr:$dst)], 977 IIC_SSE_MOVU_P_MR>; 978} // SchedRW 979 980// For disassembler 981let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 982 SchedRW = [WriteFShuffle] in { 983 def MOVAPSrr_REV : PSI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 984 "movaps\t{$src, $dst|$dst, $src}", [], 985 IIC_SSE_MOVA_P_RR>; 986 def MOVAPDrr_REV : PDI<0x29, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 987 "movapd\t{$src, $dst|$dst, $src}", [], 988 IIC_SSE_MOVA_P_RR>; 989 def MOVUPSrr_REV : PSI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 990 "movups\t{$src, $dst|$dst, $src}", [], 991 IIC_SSE_MOVU_P_RR>; 992 def MOVUPDrr_REV : PDI<0x11, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 993 "movupd\t{$src, $dst|$dst, $src}", [], 994 IIC_SSE_MOVU_P_RR>; 995} 996 997let Predicates = [HasAVX] in { 998 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 999 (VMOVUPSmr addr:$dst, VR128:$src)>; 1000 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 1001 (VMOVUPDmr addr:$dst, VR128:$src)>; 1002} 1003 1004let Predicates = [UseSSE1] in 1005 def : Pat<(int_x86_sse_storeu_ps addr:$dst, VR128:$src), 1006 (MOVUPSmr addr:$dst, VR128:$src)>; 1007let Predicates = [UseSSE2] in 1008 def : Pat<(int_x86_sse2_storeu_pd addr:$dst, VR128:$src), 1009 (MOVUPDmr addr:$dst, VR128:$src)>; 1010 1011// Use vmovaps/vmovups for AVX integer load/store. 1012let Predicates = [HasAVX, NoVLX] in { 1013 // 128-bit load/store 1014 def : Pat<(alignedloadv2i64 addr:$src), 1015 (VMOVAPSrm addr:$src)>; 1016 def : Pat<(loadv2i64 addr:$src), 1017 (VMOVUPSrm addr:$src)>; 1018 1019 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1020 (VMOVAPSmr addr:$dst, VR128:$src)>; 1021 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1022 (VMOVAPSmr addr:$dst, VR128:$src)>; 1023 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1024 (VMOVAPSmr addr:$dst, VR128:$src)>; 1025 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1026 (VMOVAPSmr addr:$dst, VR128:$src)>; 1027 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1028 (VMOVUPSmr addr:$dst, VR128:$src)>; 1029 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1030 (VMOVUPSmr addr:$dst, VR128:$src)>; 1031 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1032 (VMOVUPSmr addr:$dst, VR128:$src)>; 1033 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1034 (VMOVUPSmr addr:$dst, VR128:$src)>; 1035 1036 // 256-bit load/store 1037 def : Pat<(alignedloadv4i64 addr:$src), 1038 (VMOVAPSYrm addr:$src)>; 1039 def : Pat<(loadv4i64 addr:$src), 1040 (VMOVUPSYrm addr:$src)>; 1041 def : Pat<(alignedstore256 (v4i64 VR256:$src), addr:$dst), 1042 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1043 def : Pat<(alignedstore256 (v8i32 VR256:$src), addr:$dst), 1044 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1045 def : Pat<(alignedstore256 (v16i16 VR256:$src), addr:$dst), 1046 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1047 def : Pat<(alignedstore256 (v32i8 VR256:$src), addr:$dst), 1048 (VMOVAPSYmr addr:$dst, VR256:$src)>; 1049 def : Pat<(store (v4i64 VR256:$src), addr:$dst), 1050 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1051 def : Pat<(store (v8i32 VR256:$src), addr:$dst), 1052 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1053 def : Pat<(store (v16i16 VR256:$src), addr:$dst), 1054 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1055 def : Pat<(store (v32i8 VR256:$src), addr:$dst), 1056 (VMOVUPSYmr addr:$dst, VR256:$src)>; 1057 1058 // Special patterns for storing subvector extracts of lower 128-bits 1059 // Its cheaper to just use VMOVAPS/VMOVUPS instead of VEXTRACTF128mr 1060 def : Pat<(alignedstore (v2f64 (extract_subvector 1061 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1062 (VMOVAPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1063 def : Pat<(alignedstore (v4f32 (extract_subvector 1064 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1065 (VMOVAPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1066 def : Pat<(alignedstore (v2i64 (extract_subvector 1067 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1068 (VMOVAPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1069 def : Pat<(alignedstore (v4i32 (extract_subvector 1070 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1071 (VMOVAPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1072 def : Pat<(alignedstore (v8i16 (extract_subvector 1073 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1074 (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1075 def : Pat<(alignedstore (v16i8 (extract_subvector 1076 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1077 (VMOVAPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1078 1079 def : Pat<(store (v2f64 (extract_subvector 1080 (v4f64 VR256:$src), (iPTR 0))), addr:$dst), 1081 (VMOVUPDmr addr:$dst, (v2f64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1082 def : Pat<(store (v4f32 (extract_subvector 1083 (v8f32 VR256:$src), (iPTR 0))), addr:$dst), 1084 (VMOVUPSmr addr:$dst, (v4f32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1085 def : Pat<(store (v2i64 (extract_subvector 1086 (v4i64 VR256:$src), (iPTR 0))), addr:$dst), 1087 (VMOVUPDmr addr:$dst, (v2i64 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1088 def : Pat<(store (v4i32 (extract_subvector 1089 (v8i32 VR256:$src), (iPTR 0))), addr:$dst), 1090 (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1091 def : Pat<(store (v8i16 (extract_subvector 1092 (v16i16 VR256:$src), (iPTR 0))), addr:$dst), 1093 (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1094 def : Pat<(store (v16i8 (extract_subvector 1095 (v32i8 VR256:$src), (iPTR 0))), addr:$dst), 1096 (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; 1097} 1098 1099// Use movaps / movups for SSE integer load / store (one byte shorter). 1100// The instructions selected below are then converted to MOVDQA/MOVDQU 1101// during the SSE domain pass. 1102let Predicates = [UseSSE1] in { 1103 def : Pat<(alignedloadv2i64 addr:$src), 1104 (MOVAPSrm addr:$src)>; 1105 def : Pat<(loadv2i64 addr:$src), 1106 (MOVUPSrm addr:$src)>; 1107 1108 def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst), 1109 (MOVAPSmr addr:$dst, VR128:$src)>; 1110 def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst), 1111 (MOVAPSmr addr:$dst, VR128:$src)>; 1112 def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst), 1113 (MOVAPSmr addr:$dst, VR128:$src)>; 1114 def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst), 1115 (MOVAPSmr addr:$dst, VR128:$src)>; 1116 def : Pat<(store (v2i64 VR128:$src), addr:$dst), 1117 (MOVUPSmr addr:$dst, VR128:$src)>; 1118 def : Pat<(store (v4i32 VR128:$src), addr:$dst), 1119 (MOVUPSmr addr:$dst, VR128:$src)>; 1120 def : Pat<(store (v8i16 VR128:$src), addr:$dst), 1121 (MOVUPSmr addr:$dst, VR128:$src)>; 1122 def : Pat<(store (v16i8 VR128:$src), addr:$dst), 1123 (MOVUPSmr addr:$dst, VR128:$src)>; 1124} 1125 1126// Alias instruction to load FR32 or FR64 from f128mem using movaps. Upper 1127// bits are disregarded. FIXME: Set encoding to pseudo! 1128let canFoldAsLoad = 1, isReMaterializable = 1, SchedRW = [WriteLoad] in { 1129let isCodeGenOnly = 1 in { 1130 def FsVMOVAPSrm : VPSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1131 "movaps\t{$src, $dst|$dst, $src}", 1132 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1133 IIC_SSE_MOVA_P_RM>, VEX; 1134 def FsVMOVAPDrm : VPDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1135 "movapd\t{$src, $dst|$dst, $src}", 1136 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1137 IIC_SSE_MOVA_P_RM>, VEX; 1138 def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src), 1139 "movaps\t{$src, $dst|$dst, $src}", 1140 [(set FR32:$dst, (alignedloadfsf32 addr:$src))], 1141 IIC_SSE_MOVA_P_RM>; 1142 def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src), 1143 "movapd\t{$src, $dst|$dst, $src}", 1144 [(set FR64:$dst, (alignedloadfsf64 addr:$src))], 1145 IIC_SSE_MOVA_P_RM>; 1146} 1147} 1148 1149//===----------------------------------------------------------------------===// 1150// SSE 1 & 2 - Move Low packed FP Instructions 1151//===----------------------------------------------------------------------===// 1152 1153multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode, 1154 string base_opc, string asm_opr, 1155 InstrItinClass itin> { 1156 def PSrm : PI<opc, MRMSrcMem, 1157 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1158 !strconcat(base_opc, "s", asm_opr), 1159 [(set VR128:$dst, 1160 (psnode VR128:$src1, 1161 (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))], 1162 itin, SSEPackedSingle>, PS, 1163 Sched<[WriteFShuffleLd, ReadAfterLd]>; 1164 1165 def PDrm : PI<opc, MRMSrcMem, 1166 (outs VR128:$dst), (ins VR128:$src1, f64mem:$src2), 1167 !strconcat(base_opc, "d", asm_opr), 1168 [(set VR128:$dst, (v2f64 (pdnode VR128:$src1, 1169 (scalar_to_vector (loadf64 addr:$src2)))))], 1170 itin, SSEPackedDouble>, PD, 1171 Sched<[WriteFShuffleLd, ReadAfterLd]>; 1172 1173} 1174 1175multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode, 1176 string base_opc, InstrItinClass itin> { 1177 defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1178 "\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1179 itin>, VEX_4V; 1180 1181let Constraints = "$src1 = $dst" in 1182 defm NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc, 1183 "\t{$src2, $dst|$dst, $src2}", 1184 itin>; 1185} 1186 1187let AddedComplexity = 20 in { 1188 defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp", 1189 IIC_SSE_MOV_LH>; 1190} 1191 1192let SchedRW = [WriteStore] in { 1193def VMOVLPSmr : VPSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1194 "movlps\t{$src, $dst|$dst, $src}", 1195 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1196 (iPTR 0))), addr:$dst)], 1197 IIC_SSE_MOV_LH>, VEX; 1198def VMOVLPDmr : VPDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1199 "movlpd\t{$src, $dst|$dst, $src}", 1200 [(store (f64 (vector_extract (v2f64 VR128:$src), 1201 (iPTR 0))), addr:$dst)], 1202 IIC_SSE_MOV_LH>, VEX; 1203def MOVLPSmr : PSI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1204 "movlps\t{$src, $dst|$dst, $src}", 1205 [(store (f64 (vector_extract (bc_v2f64 (v4f32 VR128:$src)), 1206 (iPTR 0))), addr:$dst)], 1207 IIC_SSE_MOV_LH>; 1208def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1209 "movlpd\t{$src, $dst|$dst, $src}", 1210 [(store (f64 (vector_extract (v2f64 VR128:$src), 1211 (iPTR 0))), addr:$dst)], 1212 IIC_SSE_MOV_LH>; 1213} // SchedRW 1214 1215let Predicates = [HasAVX] in { 1216 // Shuffle with VMOVLPS 1217 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1218 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1219 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1220 (VMOVLPSrm VR128:$src1, addr:$src2)>; 1221 1222 // Shuffle with VMOVLPD 1223 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1224 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1225 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1226 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1227 def : Pat<(v2f64 (X86Movsd VR128:$src1, 1228 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1229 (VMOVLPDrm VR128:$src1, addr:$src2)>; 1230 1231 // Store patterns 1232 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1233 addr:$src1), 1234 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1235 def : Pat<(store (v4i32 (X86Movlps 1236 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), addr:$src1), 1237 (VMOVLPSmr addr:$src1, VR128:$src2)>; 1238 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1239 addr:$src1), 1240 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1241 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1242 addr:$src1), 1243 (VMOVLPDmr addr:$src1, VR128:$src2)>; 1244} 1245 1246let Predicates = [UseSSE1] in { 1247 // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS 1248 def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), 1249 (iPTR 0))), addr:$src1), 1250 (MOVLPSmr addr:$src1, VR128:$src2)>; 1251 1252 // Shuffle with MOVLPS 1253 def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))), 1254 (MOVLPSrm VR128:$src1, addr:$src2)>; 1255 def : Pat<(v4i32 (X86Movlps VR128:$src1, (load addr:$src2))), 1256 (MOVLPSrm VR128:$src1, addr:$src2)>; 1257 def : Pat<(X86Movlps VR128:$src1, 1258 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1259 (MOVLPSrm VR128:$src1, addr:$src2)>; 1260 1261 // Store patterns 1262 def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), 1263 addr:$src1), 1264 (MOVLPSmr addr:$src1, VR128:$src2)>; 1265 def : Pat<(store (v4i32 (X86Movlps 1266 (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)), 1267 addr:$src1), 1268 (MOVLPSmr addr:$src1, VR128:$src2)>; 1269} 1270 1271let Predicates = [UseSSE2] in { 1272 // Shuffle with MOVLPD 1273 def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1274 (MOVLPDrm VR128:$src1, addr:$src2)>; 1275 def : Pat<(v2i64 (X86Movlpd VR128:$src1, (load addr:$src2))), 1276 (MOVLPDrm VR128:$src1, addr:$src2)>; 1277 def : Pat<(v2f64 (X86Movsd VR128:$src1, 1278 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), 1279 (MOVLPDrm VR128:$src1, addr:$src2)>; 1280 1281 // Store patterns 1282 def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1283 addr:$src1), 1284 (MOVLPDmr addr:$src1, VR128:$src2)>; 1285 def : Pat<(store (v2i64 (X86Movlpd (load addr:$src1), VR128:$src2)), 1286 addr:$src1), 1287 (MOVLPDmr addr:$src1, VR128:$src2)>; 1288} 1289 1290//===----------------------------------------------------------------------===// 1291// SSE 1 & 2 - Move Hi packed FP Instructions 1292//===----------------------------------------------------------------------===// 1293 1294let AddedComplexity = 20 in { 1295 defm MOVH : sse12_mov_hilo_packed<0x16, X86Movlhps, X86Movlhpd, "movhp", 1296 IIC_SSE_MOV_LH>; 1297} 1298 1299let SchedRW = [WriteStore] in { 1300// v2f64 extract element 1 is always custom lowered to unpack high to low 1301// and extract element 0 so the non-store version isn't too horrible. 1302def VMOVHPSmr : VPSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1303 "movhps\t{$src, $dst|$dst, $src}", 1304 [(store (f64 (vector_extract 1305 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1306 (bc_v2f64 (v4f32 VR128:$src))), 1307 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1308def VMOVHPDmr : VPDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1309 "movhpd\t{$src, $dst|$dst, $src}", 1310 [(store (f64 (vector_extract 1311 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1312 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>, VEX; 1313def MOVHPSmr : PSI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1314 "movhps\t{$src, $dst|$dst, $src}", 1315 [(store (f64 (vector_extract 1316 (X86Unpckh (bc_v2f64 (v4f32 VR128:$src)), 1317 (bc_v2f64 (v4f32 VR128:$src))), 1318 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1319def MOVHPDmr : PDI<0x17, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 1320 "movhpd\t{$src, $dst|$dst, $src}", 1321 [(store (f64 (vector_extract 1322 (v2f64 (X86Unpckh VR128:$src, VR128:$src)), 1323 (iPTR 0))), addr:$dst)], IIC_SSE_MOV_LH>; 1324} // SchedRW 1325 1326let Predicates = [HasAVX] in { 1327 // VMOVHPS patterns 1328 def : Pat<(X86Movlhps VR128:$src1, 1329 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1330 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1331 def : Pat<(X86Movlhps VR128:$src1, 1332 (bc_v4i32 (v2i64 (X86vzload addr:$src2)))), 1333 (VMOVHPSrm VR128:$src1, addr:$src2)>; 1334 1335 // VMOVHPD patterns 1336 1337 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1338 // is during lowering, where it's not possible to recognize the load fold 1339 // cause it has two uses through a bitcast. One use disappears at isel time 1340 // and the fold opportunity reappears. 1341 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1342 (scalar_to_vector (loadf64 addr:$src2)))), 1343 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1344 // Also handle an i64 load because that may get selected as a faster way to 1345 // load the data. 1346 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1347 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 1348 (VMOVHPDrm VR128:$src1, addr:$src2)>; 1349 1350 def : Pat<(store (f64 (vector_extract 1351 (v2f64 (X86VPermilpi VR128:$src, (i8 1))), 1352 (iPTR 0))), addr:$dst), 1353 (VMOVHPDmr addr:$dst, VR128:$src)>; 1354} 1355 1356let Predicates = [UseSSE1] in { 1357 // MOVHPS patterns 1358 def : Pat<(X86Movlhps VR128:$src1, 1359 (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), 1360 (MOVHPSrm VR128:$src1, addr:$src2)>; 1361 def : Pat<(X86Movlhps VR128:$src1, 1362 (bc_v4f32 (v2i64 (X86vzload addr:$src2)))), 1363 (MOVHPSrm VR128:$src1, addr:$src2)>; 1364} 1365 1366let Predicates = [UseSSE2] in { 1367 // MOVHPD patterns 1368 1369 // FIXME: Instead of X86Unpckl, there should be a X86Movlhpd here, the problem 1370 // is during lowering, where it's not possible to recognize the load fold 1371 // cause it has two uses through a bitcast. One use disappears at isel time 1372 // and the fold opportunity reappears. 1373 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1374 (scalar_to_vector (loadf64 addr:$src2)))), 1375 (MOVHPDrm VR128:$src1, addr:$src2)>; 1376 // Also handle an i64 load because that may get selected as a faster way to 1377 // load the data. 1378 def : Pat<(v2f64 (X86Unpckl VR128:$src1, 1379 (bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))), 1380 (MOVHPDrm VR128:$src1, addr:$src2)>; 1381 1382 def : Pat<(store (f64 (vector_extract 1383 (v2f64 (X86Shufp VR128:$src, VR128:$src, (i8 1))), 1384 (iPTR 0))), addr:$dst), 1385 (MOVHPDmr addr:$dst, VR128:$src)>; 1386} 1387 1388//===----------------------------------------------------------------------===// 1389// SSE 1 & 2 - Move Low to High and High to Low packed FP Instructions 1390//===----------------------------------------------------------------------===// 1391 1392let AddedComplexity = 20, Predicates = [UseAVX] in { 1393 def VMOVLHPSrr : VPSI<0x16, MRMSrcReg, (outs VR128:$dst), 1394 (ins VR128:$src1, VR128:$src2), 1395 "movlhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1396 [(set VR128:$dst, 1397 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1398 IIC_SSE_MOV_LH>, 1399 VEX_4V, Sched<[WriteFShuffle]>; 1400 def VMOVHLPSrr : VPSI<0x12, MRMSrcReg, (outs VR128:$dst), 1401 (ins VR128:$src1, VR128:$src2), 1402 "movhlps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1403 [(set VR128:$dst, 1404 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1405 IIC_SSE_MOV_LH>, 1406 VEX_4V, Sched<[WriteFShuffle]>; 1407} 1408let Constraints = "$src1 = $dst", AddedComplexity = 20 in { 1409 def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst), 1410 (ins VR128:$src1, VR128:$src2), 1411 "movlhps\t{$src2, $dst|$dst, $src2}", 1412 [(set VR128:$dst, 1413 (v4f32 (X86Movlhps VR128:$src1, VR128:$src2)))], 1414 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 1415 def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst), 1416 (ins VR128:$src1, VR128:$src2), 1417 "movhlps\t{$src2, $dst|$dst, $src2}", 1418 [(set VR128:$dst, 1419 (v4f32 (X86Movhlps VR128:$src1, VR128:$src2)))], 1420 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 1421} 1422 1423let Predicates = [UseAVX] in { 1424 // MOVLHPS patterns 1425 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1426 (VMOVLHPSrr VR128:$src1, VR128:$src2)>; 1427 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1428 (VMOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1429 1430 // MOVHLPS patterns 1431 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1432 (VMOVHLPSrr VR128:$src1, VR128:$src2)>; 1433} 1434 1435let Predicates = [UseSSE1] in { 1436 // MOVLHPS patterns 1437 def : Pat<(v4i32 (X86Movlhps VR128:$src1, VR128:$src2)), 1438 (MOVLHPSrr VR128:$src1, VR128:$src2)>; 1439 def : Pat<(v2i64 (X86Movlhps VR128:$src1, VR128:$src2)), 1440 (MOVLHPSrr (v2i64 VR128:$src1), VR128:$src2)>; 1441 1442 // MOVHLPS patterns 1443 def : Pat<(v4i32 (X86Movhlps VR128:$src1, VR128:$src2)), 1444 (MOVHLPSrr VR128:$src1, VR128:$src2)>; 1445} 1446 1447//===----------------------------------------------------------------------===// 1448// SSE 1 & 2 - Conversion Instructions 1449//===----------------------------------------------------------------------===// 1450 1451def SSE_CVT_PD : OpndItins< 1452 IIC_SSE_CVT_PD_RR, IIC_SSE_CVT_PD_RM 1453>; 1454 1455let Sched = WriteCvtI2F in 1456def SSE_CVT_PS : OpndItins< 1457 IIC_SSE_CVT_PS_RR, IIC_SSE_CVT_PS_RM 1458>; 1459 1460let Sched = WriteCvtI2F in 1461def SSE_CVT_Scalar : OpndItins< 1462 IIC_SSE_CVT_Scalar_RR, IIC_SSE_CVT_Scalar_RM 1463>; 1464 1465let Sched = WriteCvtF2I in 1466def SSE_CVT_SS2SI_32 : OpndItins< 1467 IIC_SSE_CVT_SS2SI32_RR, IIC_SSE_CVT_SS2SI32_RM 1468>; 1469 1470let Sched = WriteCvtF2I in 1471def SSE_CVT_SS2SI_64 : OpndItins< 1472 IIC_SSE_CVT_SS2SI64_RR, IIC_SSE_CVT_SS2SI64_RM 1473>; 1474 1475let Sched = WriteCvtF2I in 1476def SSE_CVT_SD2SI : OpndItins< 1477 IIC_SSE_CVT_SD2SI_RR, IIC_SSE_CVT_SD2SI_RM 1478>; 1479 1480multiclass sse12_cvt_s<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1481 SDNode OpNode, X86MemOperand x86memop, PatFrag ld_frag, 1482 string asm, OpndItins itins> { 1483 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1484 [(set DstRC:$dst, (OpNode SrcRC:$src))], 1485 itins.rr>, Sched<[itins.Sched]>; 1486 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1487 [(set DstRC:$dst, (OpNode (ld_frag addr:$src)))], 1488 itins.rm>, Sched<[itins.Sched.Folded]>; 1489} 1490 1491multiclass sse12_cvt_p<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1492 X86MemOperand x86memop, string asm, Domain d, 1493 OpndItins itins> { 1494let hasSideEffects = 0 in { 1495 def rr : I<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), asm, 1496 [], itins.rr, d>, Sched<[itins.Sched]>; 1497 let mayLoad = 1 in 1498 def rm : I<opc, MRMSrcMem, (outs DstRC:$dst), (ins x86memop:$src), asm, 1499 [], itins.rm, d>, Sched<[itins.Sched.Folded]>; 1500} 1501} 1502 1503multiclass sse12_vcvt_avx<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1504 X86MemOperand x86memop, string asm> { 1505let hasSideEffects = 0, Predicates = [UseAVX] in { 1506 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src), 1507 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1508 Sched<[WriteCvtI2F]>; 1509 let mayLoad = 1 in 1510 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1511 (ins DstRC:$src1, x86memop:$src), 1512 !strconcat(asm,"\t{$src, $src1, $dst|$dst, $src1, $src}"), []>, 1513 Sched<[WriteCvtI2FLd, ReadAfterLd]>; 1514} // hasSideEffects = 0 1515} 1516 1517let Predicates = [UseAVX] in { 1518defm VCVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1519 "cvttss2si\t{$src, $dst|$dst, $src}", 1520 SSE_CVT_SS2SI_32>, 1521 XS, VEX, VEX_LIG; 1522defm VCVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1523 "cvttss2si\t{$src, $dst|$dst, $src}", 1524 SSE_CVT_SS2SI_64>, 1525 XS, VEX, VEX_W, VEX_LIG; 1526defm VCVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1527 "cvttsd2si\t{$src, $dst|$dst, $src}", 1528 SSE_CVT_SD2SI>, 1529 XD, VEX, VEX_LIG; 1530defm VCVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1531 "cvttsd2si\t{$src, $dst|$dst, $src}", 1532 SSE_CVT_SD2SI>, 1533 XD, VEX, VEX_W, VEX_LIG; 1534 1535def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1536 (VCVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1537def : InstAlias<"vcvttss2si{l}\t{$src, $dst|$dst, $src}", 1538 (VCVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1539def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1540 (VCVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1541def : InstAlias<"vcvttsd2si{l}\t{$src, $dst|$dst, $src}", 1542 (VCVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1543def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1544 (VCVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1545def : InstAlias<"vcvttss2si{q}\t{$src, $dst|$dst, $src}", 1546 (VCVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1547def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1548 (VCVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1549def : InstAlias<"vcvttsd2si{q}\t{$src, $dst|$dst, $src}", 1550 (VCVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1551} 1552// The assembler can recognize rr 64-bit instructions by seeing a rxx 1553// register, but the same isn't true when only using memory operands, 1554// provide other assembly "l" and "q" forms to address this explicitly 1555// where appropriate to do so. 1556defm VCVTSI2SS : sse12_vcvt_avx<0x2A, GR32, FR32, i32mem, "cvtsi2ss{l}">, 1557 XS, VEX_4V, VEX_LIG; 1558defm VCVTSI2SS64 : sse12_vcvt_avx<0x2A, GR64, FR32, i64mem, "cvtsi2ss{q}">, 1559 XS, VEX_4V, VEX_W, VEX_LIG; 1560defm VCVTSI2SD : sse12_vcvt_avx<0x2A, GR32, FR64, i32mem, "cvtsi2sd{l}">, 1561 XD, VEX_4V, VEX_LIG; 1562defm VCVTSI2SD64 : sse12_vcvt_avx<0x2A, GR64, FR64, i64mem, "cvtsi2sd{q}">, 1563 XD, VEX_4V, VEX_W, VEX_LIG; 1564 1565let Predicates = [UseAVX] in { 1566 def : InstAlias<"vcvtsi2ss\t{$src, $src1, $dst|$dst, $src1, $src}", 1567 (VCVTSI2SSrm FR64:$dst, FR64:$src1, i32mem:$src), 0>; 1568 def : InstAlias<"vcvtsi2sd\t{$src, $src1, $dst|$dst, $src1, $src}", 1569 (VCVTSI2SDrm FR64:$dst, FR64:$src1, i32mem:$src), 0>; 1570 1571 def : Pat<(f32 (sint_to_fp (loadi32 addr:$src))), 1572 (VCVTSI2SSrm (f32 (IMPLICIT_DEF)), addr:$src)>; 1573 def : Pat<(f32 (sint_to_fp (loadi64 addr:$src))), 1574 (VCVTSI2SS64rm (f32 (IMPLICIT_DEF)), addr:$src)>; 1575 def : Pat<(f64 (sint_to_fp (loadi32 addr:$src))), 1576 (VCVTSI2SDrm (f64 (IMPLICIT_DEF)), addr:$src)>; 1577 def : Pat<(f64 (sint_to_fp (loadi64 addr:$src))), 1578 (VCVTSI2SD64rm (f64 (IMPLICIT_DEF)), addr:$src)>; 1579 1580 def : Pat<(f32 (sint_to_fp GR32:$src)), 1581 (VCVTSI2SSrr (f32 (IMPLICIT_DEF)), GR32:$src)>; 1582 def : Pat<(f32 (sint_to_fp GR64:$src)), 1583 (VCVTSI2SS64rr (f32 (IMPLICIT_DEF)), GR64:$src)>; 1584 def : Pat<(f64 (sint_to_fp GR32:$src)), 1585 (VCVTSI2SDrr (f64 (IMPLICIT_DEF)), GR32:$src)>; 1586 def : Pat<(f64 (sint_to_fp GR64:$src)), 1587 (VCVTSI2SD64rr (f64 (IMPLICIT_DEF)), GR64:$src)>; 1588} 1589 1590defm CVTTSS2SI : sse12_cvt_s<0x2C, FR32, GR32, fp_to_sint, f32mem, loadf32, 1591 "cvttss2si\t{$src, $dst|$dst, $src}", 1592 SSE_CVT_SS2SI_32>, XS; 1593defm CVTTSS2SI64 : sse12_cvt_s<0x2C, FR32, GR64, fp_to_sint, f32mem, loadf32, 1594 "cvttss2si\t{$src, $dst|$dst, $src}", 1595 SSE_CVT_SS2SI_64>, XS, REX_W; 1596defm CVTTSD2SI : sse12_cvt_s<0x2C, FR64, GR32, fp_to_sint, f64mem, loadf64, 1597 "cvttsd2si\t{$src, $dst|$dst, $src}", 1598 SSE_CVT_SD2SI>, XD; 1599defm CVTTSD2SI64 : sse12_cvt_s<0x2C, FR64, GR64, fp_to_sint, f64mem, loadf64, 1600 "cvttsd2si\t{$src, $dst|$dst, $src}", 1601 SSE_CVT_SD2SI>, XD, REX_W; 1602defm CVTSI2SS : sse12_cvt_s<0x2A, GR32, FR32, sint_to_fp, i32mem, loadi32, 1603 "cvtsi2ss{l}\t{$src, $dst|$dst, $src}", 1604 SSE_CVT_Scalar>, XS; 1605defm CVTSI2SS64 : sse12_cvt_s<0x2A, GR64, FR32, sint_to_fp, i64mem, loadi64, 1606 "cvtsi2ss{q}\t{$src, $dst|$dst, $src}", 1607 SSE_CVT_Scalar>, XS, REX_W; 1608defm CVTSI2SD : sse12_cvt_s<0x2A, GR32, FR64, sint_to_fp, i32mem, loadi32, 1609 "cvtsi2sd{l}\t{$src, $dst|$dst, $src}", 1610 SSE_CVT_Scalar>, XD; 1611defm CVTSI2SD64 : sse12_cvt_s<0x2A, GR64, FR64, sint_to_fp, i64mem, loadi64, 1612 "cvtsi2sd{q}\t{$src, $dst|$dst, $src}", 1613 SSE_CVT_Scalar>, XD, REX_W; 1614 1615def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1616 (CVTTSS2SIrr GR32:$dst, FR32:$src), 0>; 1617def : InstAlias<"cvttss2si{l}\t{$src, $dst|$dst, $src}", 1618 (CVTTSS2SIrm GR32:$dst, f32mem:$src), 0>; 1619def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1620 (CVTTSD2SIrr GR32:$dst, FR64:$src), 0>; 1621def : InstAlias<"cvttsd2si{l}\t{$src, $dst|$dst, $src}", 1622 (CVTTSD2SIrm GR32:$dst, f64mem:$src), 0>; 1623def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1624 (CVTTSS2SI64rr GR64:$dst, FR32:$src), 0>; 1625def : InstAlias<"cvttss2si{q}\t{$src, $dst|$dst, $src}", 1626 (CVTTSS2SI64rm GR64:$dst, f32mem:$src), 0>; 1627def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1628 (CVTTSD2SI64rr GR64:$dst, FR64:$src), 0>; 1629def : InstAlias<"cvttsd2si{q}\t{$src, $dst|$dst, $src}", 1630 (CVTTSD2SI64rm GR64:$dst, f64mem:$src), 0>; 1631 1632def : InstAlias<"cvtsi2ss\t{$src, $dst|$dst, $src}", 1633 (CVTSI2SSrm FR64:$dst, i32mem:$src), 0>; 1634def : InstAlias<"cvtsi2sd\t{$src, $dst|$dst, $src}", 1635 (CVTSI2SDrm FR64:$dst, i32mem:$src), 0>; 1636 1637// Conversion Instructions Intrinsics - Match intrinsics which expect MM 1638// and/or XMM operand(s). 1639 1640multiclass sse12_cvt_sint<bits<8> opc, RegisterClass SrcRC, RegisterClass DstRC, 1641 Intrinsic Int, Operand memop, ComplexPattern mem_cpat, 1642 string asm, OpndItins itins> { 1643 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins SrcRC:$src), 1644 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1645 [(set DstRC:$dst, (Int SrcRC:$src))], itins.rr>, 1646 Sched<[itins.Sched]>; 1647 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), (ins memop:$src), 1648 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 1649 [(set DstRC:$dst, (Int mem_cpat:$src))], itins.rm>, 1650 Sched<[itins.Sched.Folded]>; 1651} 1652 1653multiclass sse12_cvt_sint_3addr<bits<8> opc, RegisterClass SrcRC, 1654 RegisterClass DstRC, Intrinsic Int, X86MemOperand x86memop, 1655 PatFrag ld_frag, string asm, OpndItins itins, 1656 bit Is2Addr = 1> { 1657 def rr : SI<opc, MRMSrcReg, (outs DstRC:$dst), (ins DstRC:$src1, SrcRC:$src2), 1658 !if(Is2Addr, 1659 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1660 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1661 [(set DstRC:$dst, (Int DstRC:$src1, SrcRC:$src2))], 1662 itins.rr>, Sched<[itins.Sched]>; 1663 def rm : SI<opc, MRMSrcMem, (outs DstRC:$dst), 1664 (ins DstRC:$src1, x86memop:$src2), 1665 !if(Is2Addr, 1666 !strconcat(asm, "\t{$src2, $dst|$dst, $src2}"), 1667 !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 1668 [(set DstRC:$dst, (Int DstRC:$src1, (ld_frag addr:$src2)))], 1669 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 1670} 1671 1672let Predicates = [UseAVX] in { 1673defm VCVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, 1674 int_x86_sse2_cvtsd2si, sdmem, sse_load_f64, "cvtsd2si", 1675 SSE_CVT_SD2SI>, XD, VEX, VEX_LIG; 1676defm VCVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, 1677 int_x86_sse2_cvtsd2si64, sdmem, sse_load_f64, "cvtsd2si", 1678 SSE_CVT_SD2SI>, XD, VEX, VEX_W, VEX_LIG; 1679} 1680defm CVTSD2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse2_cvtsd2si, 1681 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD; 1682defm CVTSD2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse2_cvtsd2si64, 1683 sdmem, sse_load_f64, "cvtsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1684 1685 1686let isCodeGenOnly = 1 in { 1687 let Predicates = [UseAVX] in { 1688 defm Int_VCVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1689 int_x86_sse_cvtsi2ss, i32mem, loadi32, "cvtsi2ss{l}", 1690 SSE_CVT_Scalar, 0>, XS, VEX_4V; 1691 defm Int_VCVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1692 int_x86_sse_cvtsi642ss, i64mem, loadi64, "cvtsi2ss{q}", 1693 SSE_CVT_Scalar, 0>, XS, VEX_4V, 1694 VEX_W; 1695 defm Int_VCVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1696 int_x86_sse2_cvtsi2sd, i32mem, loadi32, "cvtsi2sd{l}", 1697 SSE_CVT_Scalar, 0>, XD, VEX_4V; 1698 defm Int_VCVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1699 int_x86_sse2_cvtsi642sd, i64mem, loadi64, "cvtsi2sd{q}", 1700 SSE_CVT_Scalar, 0>, XD, 1701 VEX_4V, VEX_W; 1702 } 1703 let Constraints = "$src1 = $dst" in { 1704 defm Int_CVTSI2SS : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1705 int_x86_sse_cvtsi2ss, i32mem, loadi32, 1706 "cvtsi2ss{l}", SSE_CVT_Scalar>, XS; 1707 defm Int_CVTSI2SS64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1708 int_x86_sse_cvtsi642ss, i64mem, loadi64, 1709 "cvtsi2ss{q}", SSE_CVT_Scalar>, XS, REX_W; 1710 defm Int_CVTSI2SD : sse12_cvt_sint_3addr<0x2A, GR32, VR128, 1711 int_x86_sse2_cvtsi2sd, i32mem, loadi32, 1712 "cvtsi2sd{l}", SSE_CVT_Scalar>, XD; 1713 defm Int_CVTSI2SD64 : sse12_cvt_sint_3addr<0x2A, GR64, VR128, 1714 int_x86_sse2_cvtsi642sd, i64mem, loadi64, 1715 "cvtsi2sd{q}", SSE_CVT_Scalar>, XD, REX_W; 1716 } 1717} // isCodeGenOnly = 1 1718 1719/// SSE 1 Only 1720 1721// Aliases for intrinsics 1722let isCodeGenOnly = 1 in { 1723let Predicates = [UseAVX] in { 1724defm Int_VCVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1725 ssmem, sse_load_f32, "cvttss2si", 1726 SSE_CVT_SS2SI_32>, XS, VEX; 1727defm Int_VCVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1728 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1729 "cvttss2si", SSE_CVT_SS2SI_64>, 1730 XS, VEX, VEX_W; 1731defm Int_VCVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1732 sdmem, sse_load_f64, "cvttsd2si", 1733 SSE_CVT_SD2SI>, XD, VEX; 1734defm Int_VCVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1735 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1736 "cvttsd2si", SSE_CVT_SD2SI>, 1737 XD, VEX, VEX_W; 1738} 1739defm Int_CVTTSS2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse_cvttss2si, 1740 ssmem, sse_load_f32, "cvttss2si", 1741 SSE_CVT_SS2SI_32>, XS; 1742defm Int_CVTTSS2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1743 int_x86_sse_cvttss2si64, ssmem, sse_load_f32, 1744 "cvttss2si", SSE_CVT_SS2SI_64>, XS, REX_W; 1745defm Int_CVTTSD2SI : sse12_cvt_sint<0x2C, VR128, GR32, int_x86_sse2_cvttsd2si, 1746 sdmem, sse_load_f64, "cvttsd2si", 1747 SSE_CVT_SD2SI>, XD; 1748defm Int_CVTTSD2SI64 : sse12_cvt_sint<0x2C, VR128, GR64, 1749 int_x86_sse2_cvttsd2si64, sdmem, sse_load_f64, 1750 "cvttsd2si", SSE_CVT_SD2SI>, XD, REX_W; 1751} // isCodeGenOnly = 1 1752 1753let Predicates = [UseAVX] in { 1754defm VCVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1755 ssmem, sse_load_f32, "cvtss2si", 1756 SSE_CVT_SS2SI_32>, XS, VEX, VEX_LIG; 1757defm VCVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1758 ssmem, sse_load_f32, "cvtss2si", 1759 SSE_CVT_SS2SI_64>, XS, VEX, VEX_W, VEX_LIG; 1760} 1761defm CVTSS2SI : sse12_cvt_sint<0x2D, VR128, GR32, int_x86_sse_cvtss2si, 1762 ssmem, sse_load_f32, "cvtss2si", 1763 SSE_CVT_SS2SI_32>, XS; 1764defm CVTSS2SI64 : sse12_cvt_sint<0x2D, VR128, GR64, int_x86_sse_cvtss2si64, 1765 ssmem, sse_load_f32, "cvtss2si", 1766 SSE_CVT_SS2SI_64>, XS, REX_W; 1767 1768defm VCVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1769 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1770 SSEPackedSingle, SSE_CVT_PS>, 1771 PS, VEX, Requires<[HasAVX]>; 1772defm VCVTDQ2PSY : sse12_cvt_p<0x5B, VR256, VR256, i256mem, 1773 "vcvtdq2ps\t{$src, $dst|$dst, $src}", 1774 SSEPackedSingle, SSE_CVT_PS>, 1775 PS, VEX, VEX_L, Requires<[HasAVX]>; 1776 1777defm CVTDQ2PS : sse12_cvt_p<0x5B, VR128, VR128, i128mem, 1778 "cvtdq2ps\t{$src, $dst|$dst, $src}", 1779 SSEPackedSingle, SSE_CVT_PS>, 1780 PS, Requires<[UseSSE2]>; 1781 1782let Predicates = [UseAVX] in { 1783def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1784 (VCVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1785def : InstAlias<"vcvtss2si{l}\t{$src, $dst|$dst, $src}", 1786 (VCVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1787def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1788 (VCVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1789def : InstAlias<"vcvtsd2si{l}\t{$src, $dst|$dst, $src}", 1790 (VCVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1791def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1792 (VCVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1793def : InstAlias<"vcvtss2si{q}\t{$src, $dst|$dst, $src}", 1794 (VCVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1795def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1796 (VCVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1797def : InstAlias<"vcvtsd2si{q}\t{$src, $dst|$dst, $src}", 1798 (VCVTSD2SI64rm GR64:$dst, sdmem:$src), 0>; 1799} 1800 1801def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1802 (CVTSS2SIrr GR32:$dst, VR128:$src), 0>; 1803def : InstAlias<"cvtss2si{l}\t{$src, $dst|$dst, $src}", 1804 (CVTSS2SIrm GR32:$dst, ssmem:$src), 0>; 1805def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1806 (CVTSD2SIrr GR32:$dst, VR128:$src), 0>; 1807def : InstAlias<"cvtsd2si{l}\t{$src, $dst|$dst, $src}", 1808 (CVTSD2SIrm GR32:$dst, sdmem:$src), 0>; 1809def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1810 (CVTSS2SI64rr GR64:$dst, VR128:$src), 0>; 1811def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}", 1812 (CVTSS2SI64rm GR64:$dst, ssmem:$src), 0>; 1813def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1814 (CVTSD2SI64rr GR64:$dst, VR128:$src), 0>; 1815def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}", 1816 (CVTSD2SI64rm GR64:$dst, sdmem:$src)>; 1817 1818/// SSE 2 Only 1819 1820// Convert scalar double to scalar single 1821let hasSideEffects = 0, Predicates = [UseAVX] in { 1822def VCVTSD2SSrr : VSDI<0x5A, MRMSrcReg, (outs FR32:$dst), 1823 (ins FR64:$src1, FR64:$src2), 1824 "cvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], 1825 IIC_SSE_CVT_Scalar_RR>, VEX_4V, VEX_LIG, 1826 Sched<[WriteCvtF2F]>; 1827let mayLoad = 1 in 1828def VCVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), 1829 (ins FR64:$src1, f64mem:$src2), 1830 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1831 [], IIC_SSE_CVT_Scalar_RM>, 1832 XD, Requires<[HasAVX, OptForSize]>, VEX_4V, VEX_LIG, 1833 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1834} 1835 1836def : Pat<(f32 (fround FR64:$src)), (VCVTSD2SSrr FR64:$src, FR64:$src)>, 1837 Requires<[UseAVX]>; 1838 1839def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src), 1840 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1841 [(set FR32:$dst, (fround FR64:$src))], 1842 IIC_SSE_CVT_Scalar_RR>, Sched<[WriteCvtF2F]>; 1843def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src), 1844 "cvtsd2ss\t{$src, $dst|$dst, $src}", 1845 [(set FR32:$dst, (fround (loadf64 addr:$src)))], 1846 IIC_SSE_CVT_Scalar_RM>, 1847 XD, 1848 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1849 1850let isCodeGenOnly = 1 in { 1851def Int_VCVTSD2SSrr: I<0x5A, MRMSrcReg, 1852 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1853 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1854 [(set VR128:$dst, 1855 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1856 IIC_SSE_CVT_Scalar_RR>, XD, VEX_4V, Requires<[UseAVX]>, 1857 Sched<[WriteCvtF2F]>; 1858def Int_VCVTSD2SSrm: I<0x5A, MRMSrcReg, 1859 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1860 "vcvtsd2ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1861 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1862 VR128:$src1, sse_load_f64:$src2))], 1863 IIC_SSE_CVT_Scalar_RM>, XD, VEX_4V, Requires<[UseAVX]>, 1864 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1865 1866let Constraints = "$src1 = $dst" in { 1867def Int_CVTSD2SSrr: I<0x5A, MRMSrcReg, 1868 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1869 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1870 [(set VR128:$dst, 1871 (int_x86_sse2_cvtsd2ss VR128:$src1, VR128:$src2))], 1872 IIC_SSE_CVT_Scalar_RR>, XD, Requires<[UseSSE2]>, 1873 Sched<[WriteCvtF2F]>; 1874def Int_CVTSD2SSrm: I<0x5A, MRMSrcReg, 1875 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2), 1876 "cvtsd2ss\t{$src2, $dst|$dst, $src2}", 1877 [(set VR128:$dst, (int_x86_sse2_cvtsd2ss 1878 VR128:$src1, sse_load_f64:$src2))], 1879 IIC_SSE_CVT_Scalar_RM>, XD, Requires<[UseSSE2]>, 1880 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1881} 1882} // isCodeGenOnly = 1 1883 1884// Convert scalar single to scalar double 1885// SSE2 instructions with XS prefix 1886let hasSideEffects = 0, Predicates = [UseAVX] in { 1887def VCVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), 1888 (ins FR32:$src1, FR32:$src2), 1889 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1890 [], IIC_SSE_CVT_Scalar_RR>, 1891 XS, Requires<[HasAVX]>, VEX_4V, VEX_LIG, 1892 Sched<[WriteCvtF2F]>; 1893let mayLoad = 1 in 1894def VCVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), 1895 (ins FR32:$src1, f32mem:$src2), 1896 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1897 [], IIC_SSE_CVT_Scalar_RM>, 1898 XS, VEX_4V, VEX_LIG, Requires<[HasAVX, OptForSize]>, 1899 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1900} 1901 1902def : Pat<(f64 (fextend FR32:$src)), 1903 (VCVTSS2SDrr FR32:$src, FR32:$src)>, Requires<[UseAVX]>; 1904def : Pat<(fextend (loadf32 addr:$src)), 1905 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, Requires<[UseAVX]>; 1906 1907def : Pat<(extloadf32 addr:$src), 1908 (VCVTSS2SDrm (f32 (IMPLICIT_DEF)), addr:$src)>, 1909 Requires<[UseAVX, OptForSize]>; 1910def : Pat<(extloadf32 addr:$src), 1911 (VCVTSS2SDrr (f32 (IMPLICIT_DEF)), (VMOVSSrm addr:$src))>, 1912 Requires<[UseAVX, OptForSpeed]>; 1913 1914def CVTSS2SDrr : I<0x5A, MRMSrcReg, (outs FR64:$dst), (ins FR32:$src), 1915 "cvtss2sd\t{$src, $dst|$dst, $src}", 1916 [(set FR64:$dst, (fextend FR32:$src))], 1917 IIC_SSE_CVT_Scalar_RR>, XS, 1918 Requires<[UseSSE2]>, Sched<[WriteCvtF2F]>; 1919def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src), 1920 "cvtss2sd\t{$src, $dst|$dst, $src}", 1921 [(set FR64:$dst, (extloadf32 addr:$src))], 1922 IIC_SSE_CVT_Scalar_RM>, XS, 1923 Requires<[UseSSE2, OptForSize]>, Sched<[WriteCvtF2FLd]>; 1924 1925// extload f32 -> f64. This matches load+fextend because we have a hack in 1926// the isel (PreprocessForFPConvert) that can introduce loads after dag 1927// combine. 1928// Since these loads aren't folded into the fextend, we have to match it 1929// explicitly here. 1930def : Pat<(fextend (loadf32 addr:$src)), 1931 (CVTSS2SDrm addr:$src)>, Requires<[UseSSE2]>; 1932def : Pat<(extloadf32 addr:$src), 1933 (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[UseSSE2, OptForSpeed]>; 1934 1935let isCodeGenOnly = 1 in { 1936def Int_VCVTSS2SDrr: I<0x5A, MRMSrcReg, 1937 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1938 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1939 [(set VR128:$dst, 1940 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1941 IIC_SSE_CVT_Scalar_RR>, XS, VEX_4V, Requires<[UseAVX]>, 1942 Sched<[WriteCvtF2F]>; 1943def Int_VCVTSS2SDrm: I<0x5A, MRMSrcMem, 1944 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1945 "vcvtss2sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 1946 [(set VR128:$dst, 1947 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1948 IIC_SSE_CVT_Scalar_RM>, XS, VEX_4V, Requires<[UseAVX]>, 1949 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1950let Constraints = "$src1 = $dst" in { // SSE2 instructions with XS prefix 1951def Int_CVTSS2SDrr: I<0x5A, MRMSrcReg, 1952 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 1953 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1954 [(set VR128:$dst, 1955 (int_x86_sse2_cvtss2sd VR128:$src1, VR128:$src2))], 1956 IIC_SSE_CVT_Scalar_RR>, XS, Requires<[UseSSE2]>, 1957 Sched<[WriteCvtF2F]>; 1958def Int_CVTSS2SDrm: I<0x5A, MRMSrcMem, 1959 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2), 1960 "cvtss2sd\t{$src2, $dst|$dst, $src2}", 1961 [(set VR128:$dst, 1962 (int_x86_sse2_cvtss2sd VR128:$src1, sse_load_f32:$src2))], 1963 IIC_SSE_CVT_Scalar_RM>, XS, Requires<[UseSSE2]>, 1964 Sched<[WriteCvtF2FLd, ReadAfterLd]>; 1965} 1966} // isCodeGenOnly = 1 1967 1968// Convert packed single/double fp to doubleword 1969def VCVTPS2DQrr : VPDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1970 "cvtps2dq\t{$src, $dst|$dst, $src}", 1971 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1972 IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 1973def VCVTPS2DQrm : VPDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1974 "cvtps2dq\t{$src, $dst|$dst, $src}", 1975 [(set VR128:$dst, 1976 (int_x86_sse2_cvtps2dq (loadv4f32 addr:$src)))], 1977 IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 1978def VCVTPS2DQYrr : VPDI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 1979 "cvtps2dq\t{$src, $dst|$dst, $src}", 1980 [(set VR256:$dst, 1981 (int_x86_avx_cvt_ps2dq_256 VR256:$src))], 1982 IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 1983def VCVTPS2DQYrm : VPDI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 1984 "cvtps2dq\t{$src, $dst|$dst, $src}", 1985 [(set VR256:$dst, 1986 (int_x86_avx_cvt_ps2dq_256 (loadv8f32 addr:$src)))], 1987 IIC_SSE_CVT_PS_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 1988def CVTPS2DQrr : PDI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 1989 "cvtps2dq\t{$src, $dst|$dst, $src}", 1990 [(set VR128:$dst, (int_x86_sse2_cvtps2dq VR128:$src))], 1991 IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 1992def CVTPS2DQrm : PDI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 1993 "cvtps2dq\t{$src, $dst|$dst, $src}", 1994 [(set VR128:$dst, 1995 (int_x86_sse2_cvtps2dq (memopv4f32 addr:$src)))], 1996 IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 1997 1998 1999// Convert Packed Double FP to Packed DW Integers 2000let Predicates = [HasAVX] in { 2001// The assembler can recognize rr 256-bit instructions by seeing a ymm 2002// register, but the same isn't true when using memory operands instead. 2003// Provide other assembly rr and rm forms to address this explicitly. 2004def VCVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2005 "vcvtpd2dq\t{$src, $dst|$dst, $src}", 2006 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))]>, 2007 VEX, Sched<[WriteCvtF2I]>; 2008 2009// XMM only 2010def : InstAlias<"vcvtpd2dqx\t{$src, $dst|$dst, $src}", 2011 (VCVTPD2DQrr VR128:$dst, VR128:$src), 0>; 2012def VCVTPD2DQXrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2013 "vcvtpd2dqx\t{$src, $dst|$dst, $src}", 2014 [(set VR128:$dst, 2015 (int_x86_sse2_cvtpd2dq (loadv2f64 addr:$src)))]>, VEX, 2016 Sched<[WriteCvtF2ILd]>; 2017 2018// YMM only 2019def VCVTPD2DQYrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2020 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 2021 [(set VR128:$dst, 2022 (int_x86_avx_cvt_pd2dq_256 VR256:$src))]>, VEX, VEX_L, 2023 Sched<[WriteCvtF2I]>; 2024def VCVTPD2DQYrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2025 "vcvtpd2dq{y}\t{$src, $dst|$dst, $src}", 2026 [(set VR128:$dst, 2027 (int_x86_avx_cvt_pd2dq_256 (loadv4f64 addr:$src)))]>, 2028 VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2029def : InstAlias<"vcvtpd2dq\t{$src, $dst|$dst, $src}", 2030 (VCVTPD2DQYrr VR128:$dst, VR256:$src), 0>; 2031} 2032 2033def CVTPD2DQrm : SDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2034 "cvtpd2dq\t{$src, $dst|$dst, $src}", 2035 [(set VR128:$dst, 2036 (int_x86_sse2_cvtpd2dq (memopv2f64 addr:$src)))], 2037 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2ILd]>; 2038def CVTPD2DQrr : SDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2039 "cvtpd2dq\t{$src, $dst|$dst, $src}", 2040 [(set VR128:$dst, (int_x86_sse2_cvtpd2dq VR128:$src))], 2041 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2042 2043// Convert with truncation packed single/double fp to doubleword 2044// SSE2 packed instructions with XS prefix 2045def VCVTTPS2DQrr : VS2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2046 "cvttps2dq\t{$src, $dst|$dst, $src}", 2047 [(set VR128:$dst, 2048 (int_x86_sse2_cvttps2dq VR128:$src))], 2049 IIC_SSE_CVT_PS_RR>, VEX, Sched<[WriteCvtF2I]>; 2050def VCVTTPS2DQrm : VS2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2051 "cvttps2dq\t{$src, $dst|$dst, $src}", 2052 [(set VR128:$dst, (int_x86_sse2_cvttps2dq 2053 (loadv4f32 addr:$src)))], 2054 IIC_SSE_CVT_PS_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2055def VCVTTPS2DQYrr : VS2SI<0x5B, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 2056 "cvttps2dq\t{$src, $dst|$dst, $src}", 2057 [(set VR256:$dst, 2058 (int_x86_avx_cvtt_ps2dq_256 VR256:$src))], 2059 IIC_SSE_CVT_PS_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2060def VCVTTPS2DQYrm : VS2SI<0x5B, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 2061 "cvttps2dq\t{$src, $dst|$dst, $src}", 2062 [(set VR256:$dst, (int_x86_avx_cvtt_ps2dq_256 2063 (loadv8f32 addr:$src)))], 2064 IIC_SSE_CVT_PS_RM>, VEX, VEX_L, 2065 Sched<[WriteCvtF2ILd]>; 2066 2067def CVTTPS2DQrr : S2SI<0x5B, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2068 "cvttps2dq\t{$src, $dst|$dst, $src}", 2069 [(set VR128:$dst, (int_x86_sse2_cvttps2dq VR128:$src))], 2070 IIC_SSE_CVT_PS_RR>, Sched<[WriteCvtF2I]>; 2071def CVTTPS2DQrm : S2SI<0x5B, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2072 "cvttps2dq\t{$src, $dst|$dst, $src}", 2073 [(set VR128:$dst, 2074 (int_x86_sse2_cvttps2dq (memopv4f32 addr:$src)))], 2075 IIC_SSE_CVT_PS_RM>, Sched<[WriteCvtF2ILd]>; 2076 2077let Predicates = [HasAVX] in { 2078 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2079 (VCVTDQ2PSrr VR128:$src)>; 2080 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2081 (VCVTDQ2PSrm addr:$src)>; 2082 2083 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2084 (VCVTDQ2PSrr VR128:$src)>; 2085 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (loadv2i64 addr:$src))), 2086 (VCVTDQ2PSrm addr:$src)>; 2087 2088 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2089 (VCVTTPS2DQrr VR128:$src)>; 2090 def : Pat<(v4i32 (fp_to_sint (loadv4f32 addr:$src))), 2091 (VCVTTPS2DQrm addr:$src)>; 2092 2093 def : Pat<(v8f32 (sint_to_fp (v8i32 VR256:$src))), 2094 (VCVTDQ2PSYrr VR256:$src)>; 2095 def : Pat<(v8f32 (sint_to_fp (bc_v8i32 (loadv4i64 addr:$src)))), 2096 (VCVTDQ2PSYrm addr:$src)>; 2097 2098 def : Pat<(v8i32 (fp_to_sint (v8f32 VR256:$src))), 2099 (VCVTTPS2DQYrr VR256:$src)>; 2100 def : Pat<(v8i32 (fp_to_sint (loadv8f32 addr:$src))), 2101 (VCVTTPS2DQYrm addr:$src)>; 2102} 2103 2104let Predicates = [UseSSE2] in { 2105 def : Pat<(v4f32 (sint_to_fp (v4i32 VR128:$src))), 2106 (CVTDQ2PSrr VR128:$src)>; 2107 def : Pat<(v4f32 (sint_to_fp (bc_v4i32 (memopv2i64 addr:$src)))), 2108 (CVTDQ2PSrm addr:$src)>; 2109 2110 def : Pat<(int_x86_sse2_cvtdq2ps VR128:$src), 2111 (CVTDQ2PSrr VR128:$src)>; 2112 def : Pat<(int_x86_sse2_cvtdq2ps (bc_v4i32 (memopv2i64 addr:$src))), 2113 (CVTDQ2PSrm addr:$src)>; 2114 2115 def : Pat<(v4i32 (fp_to_sint (v4f32 VR128:$src))), 2116 (CVTTPS2DQrr VR128:$src)>; 2117 def : Pat<(v4i32 (fp_to_sint (memopv4f32 addr:$src))), 2118 (CVTTPS2DQrm addr:$src)>; 2119} 2120 2121def VCVTTPD2DQrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2122 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2123 [(set VR128:$dst, 2124 (int_x86_sse2_cvttpd2dq VR128:$src))], 2125 IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2I]>; 2126 2127// The assembler can recognize rr 256-bit instructions by seeing a ymm 2128// register, but the same isn't true when using memory operands instead. 2129// Provide other assembly rr and rm forms to address this explicitly. 2130 2131// XMM only 2132def : InstAlias<"vcvttpd2dqx\t{$src, $dst|$dst, $src}", 2133 (VCVTTPD2DQrr VR128:$dst, VR128:$src), 0>; 2134def VCVTTPD2DQXrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2135 "cvttpd2dqx\t{$src, $dst|$dst, $src}", 2136 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2137 (loadv2f64 addr:$src)))], 2138 IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2ILd]>; 2139 2140// YMM only 2141def VCVTTPD2DQYrr : VPDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2142 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2143 [(set VR128:$dst, 2144 (int_x86_avx_cvtt_pd2dq_256 VR256:$src))], 2145 IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2I]>; 2146def VCVTTPD2DQYrm : VPDI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2147 "cvttpd2dq{y}\t{$src, $dst|$dst, $src}", 2148 [(set VR128:$dst, 2149 (int_x86_avx_cvtt_pd2dq_256 (loadv4f64 addr:$src)))], 2150 IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2ILd]>; 2151def : InstAlias<"vcvttpd2dq\t{$src, $dst|$dst, $src}", 2152 (VCVTTPD2DQYrr VR128:$dst, VR256:$src), 0>; 2153 2154let Predicates = [HasAVX] in { 2155 def : Pat<(v4i32 (fp_to_sint (v4f64 VR256:$src))), 2156 (VCVTTPD2DQYrr VR256:$src)>; 2157 def : Pat<(v4i32 (fp_to_sint (loadv4f64 addr:$src))), 2158 (VCVTTPD2DQYrm addr:$src)>; 2159} // Predicates = [HasAVX] 2160 2161def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2162 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2163 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq VR128:$src))], 2164 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2I]>; 2165def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), 2166 "cvttpd2dq\t{$src, $dst|$dst, $src}", 2167 [(set VR128:$dst, (int_x86_sse2_cvttpd2dq 2168 (memopv2f64 addr:$src)))], 2169 IIC_SSE_CVT_PD_RM>, 2170 Sched<[WriteCvtF2ILd]>; 2171 2172// Convert packed single to packed double 2173let Predicates = [HasAVX] in { 2174 // SSE2 instructions without OpSize prefix 2175def VCVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2176 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2177 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2178 IIC_SSE_CVT_PD_RR>, PS, VEX, Sched<[WriteCvtF2F]>; 2179def VCVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2180 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2181 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2182 IIC_SSE_CVT_PD_RM>, PS, VEX, Sched<[WriteCvtF2FLd]>; 2183def VCVTPS2PDYrr : I<0x5A, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2184 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2185 [(set VR256:$dst, 2186 (int_x86_avx_cvt_ps2_pd_256 VR128:$src))], 2187 IIC_SSE_CVT_PD_RR>, PS, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2188def VCVTPS2PDYrm : I<0x5A, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), 2189 "vcvtps2pd\t{$src, $dst|$dst, $src}", 2190 [(set VR256:$dst, 2191 (int_x86_avx_cvt_ps2_pd_256 (loadv4f32 addr:$src)))], 2192 IIC_SSE_CVT_PD_RM>, PS, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2193} 2194 2195let Predicates = [UseSSE2] in { 2196def CVTPS2PDrr : I<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2197 "cvtps2pd\t{$src, $dst|$dst, $src}", 2198 [(set VR128:$dst, (int_x86_sse2_cvtps2pd VR128:$src))], 2199 IIC_SSE_CVT_PD_RR>, PS, Sched<[WriteCvtF2F]>; 2200def CVTPS2PDrm : I<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 2201 "cvtps2pd\t{$src, $dst|$dst, $src}", 2202 [(set VR128:$dst, (v2f64 (extloadv2f32 addr:$src)))], 2203 IIC_SSE_CVT_PD_RM>, PS, Sched<[WriteCvtF2FLd]>; 2204} 2205 2206// Convert Packed DW Integers to Packed Double FP 2207let Predicates = [HasAVX] in { 2208let hasSideEffects = 0, mayLoad = 1 in 2209def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2210 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2211 []>, VEX, Sched<[WriteCvtI2FLd]>; 2212def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2213 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2214 [(set VR128:$dst, 2215 (int_x86_sse2_cvtdq2pd VR128:$src))]>, VEX, 2216 Sched<[WriteCvtI2F]>; 2217def VCVTDQ2PDYrm : S2SI<0xE6, MRMSrcMem, (outs VR256:$dst), (ins i128mem:$src), 2218 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2219 [(set VR256:$dst, 2220 (int_x86_avx_cvtdq2_pd_256 2221 (bitconvert (loadv2i64 addr:$src))))]>, VEX, VEX_L, 2222 Sched<[WriteCvtI2FLd]>; 2223def VCVTDQ2PDYrr : S2SI<0xE6, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 2224 "vcvtdq2pd\t{$src, $dst|$dst, $src}", 2225 [(set VR256:$dst, 2226 (int_x86_avx_cvtdq2_pd_256 VR128:$src))]>, VEX, VEX_L, 2227 Sched<[WriteCvtI2F]>; 2228} 2229 2230let hasSideEffects = 0, mayLoad = 1 in 2231def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 2232 "cvtdq2pd\t{$src, $dst|$dst, $src}", [], 2233 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>; 2234def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2235 "cvtdq2pd\t{$src, $dst|$dst, $src}", 2236 [(set VR128:$dst, (int_x86_sse2_cvtdq2pd VR128:$src))], 2237 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtI2F]>; 2238 2239// AVX 256-bit register conversion intrinsics 2240let Predicates = [HasAVX] in { 2241 def : Pat<(v4f64 (sint_to_fp (v4i32 VR128:$src))), 2242 (VCVTDQ2PDYrr VR128:$src)>; 2243 def : Pat<(v4f64 (sint_to_fp (bc_v4i32 (loadv2i64 addr:$src)))), 2244 (VCVTDQ2PDYrm addr:$src)>; 2245} // Predicates = [HasAVX] 2246 2247// Convert packed double to packed single 2248// The assembler can recognize rr 256-bit instructions by seeing a ymm 2249// register, but the same isn't true when using memory operands instead. 2250// Provide other assembly rr and rm forms to address this explicitly. 2251def VCVTPD2PSrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2252 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2253 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2254 IIC_SSE_CVT_PD_RR>, VEX, Sched<[WriteCvtF2F]>; 2255 2256// XMM only 2257def : InstAlias<"vcvtpd2psx\t{$src, $dst|$dst, $src}", 2258 (VCVTPD2PSrr VR128:$dst, VR128:$src), 0>; 2259def VCVTPD2PSXrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2260 "cvtpd2psx\t{$src, $dst|$dst, $src}", 2261 [(set VR128:$dst, 2262 (int_x86_sse2_cvtpd2ps (loadv2f64 addr:$src)))], 2263 IIC_SSE_CVT_PD_RM>, VEX, Sched<[WriteCvtF2FLd]>; 2264 2265// YMM only 2266def VCVTPD2PSYrr : VPDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR256:$src), 2267 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2268 [(set VR128:$dst, 2269 (int_x86_avx_cvt_pd2_ps_256 VR256:$src))], 2270 IIC_SSE_CVT_PD_RR>, VEX, VEX_L, Sched<[WriteCvtF2F]>; 2271def VCVTPD2PSYrm : VPDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f256mem:$src), 2272 "cvtpd2ps{y}\t{$src, $dst|$dst, $src}", 2273 [(set VR128:$dst, 2274 (int_x86_avx_cvt_pd2_ps_256 (loadv4f64 addr:$src)))], 2275 IIC_SSE_CVT_PD_RM>, VEX, VEX_L, Sched<[WriteCvtF2FLd]>; 2276def : InstAlias<"vcvtpd2ps\t{$src, $dst|$dst, $src}", 2277 (VCVTPD2PSYrr VR128:$dst, VR256:$src), 0>; 2278 2279def CVTPD2PSrr : PDI<0x5A, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 2280 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2281 [(set VR128:$dst, (int_x86_sse2_cvtpd2ps VR128:$src))], 2282 IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtF2F]>; 2283def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 2284 "cvtpd2ps\t{$src, $dst|$dst, $src}", 2285 [(set VR128:$dst, 2286 (int_x86_sse2_cvtpd2ps (memopv2f64 addr:$src)))], 2287 IIC_SSE_CVT_PD_RM>, Sched<[WriteCvtF2FLd]>; 2288 2289 2290// AVX 256-bit register conversion intrinsics 2291// FIXME: Migrate SSE conversion intrinsics matching to use patterns as below 2292// whenever possible to avoid declaring two versions of each one. 2293let Predicates = [HasAVX] in { 2294 def : Pat<(int_x86_avx_cvtdq2_ps_256 VR256:$src), 2295 (VCVTDQ2PSYrr VR256:$src)>; 2296 def : Pat<(int_x86_avx_cvtdq2_ps_256 (bitconvert (loadv4i64 addr:$src))), 2297 (VCVTDQ2PSYrm addr:$src)>; 2298 2299 // Match fround and fextend for 128/256-bit conversions 2300 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2301 (VCVTPD2PSrr VR128:$src)>; 2302 def : Pat<(v4f32 (X86vfpround (loadv2f64 addr:$src))), 2303 (VCVTPD2PSXrm addr:$src)>; 2304 def : Pat<(v4f32 (fround (v4f64 VR256:$src))), 2305 (VCVTPD2PSYrr VR256:$src)>; 2306 def : Pat<(v4f32 (fround (loadv4f64 addr:$src))), 2307 (VCVTPD2PSYrm addr:$src)>; 2308 2309 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2310 (VCVTPS2PDrr VR128:$src)>; 2311 def : Pat<(v4f64 (fextend (v4f32 VR128:$src))), 2312 (VCVTPS2PDYrr VR128:$src)>; 2313 def : Pat<(v4f64 (extloadv4f32 addr:$src)), 2314 (VCVTPS2PDYrm addr:$src)>; 2315} 2316 2317let Predicates = [UseSSE2] in { 2318 // Match fround and fextend for 128 conversions 2319 def : Pat<(v4f32 (X86vfpround (v2f64 VR128:$src))), 2320 (CVTPD2PSrr VR128:$src)>; 2321 def : Pat<(v4f32 (X86vfpround (memopv2f64 addr:$src))), 2322 (CVTPD2PSrm addr:$src)>; 2323 2324 def : Pat<(v2f64 (X86vfpext (v4f32 VR128:$src))), 2325 (CVTPS2PDrr VR128:$src)>; 2326} 2327 2328//===----------------------------------------------------------------------===// 2329// SSE 1 & 2 - Compare Instructions 2330//===----------------------------------------------------------------------===// 2331 2332// sse12_cmp_scalar - sse 1 & 2 compare scalar instructions 2333multiclass sse12_cmp_scalar<RegisterClass RC, X86MemOperand x86memop, 2334 Operand CC, SDNode OpNode, ValueType VT, 2335 PatFrag ld_frag, string asm, string asm_alt, 2336 OpndItins itins, ImmLeaf immLeaf> { 2337 def rr : SIi8<0xC2, MRMSrcReg, 2338 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2339 [(set RC:$dst, (OpNode (VT RC:$src1), RC:$src2, immLeaf:$cc))], 2340 itins.rr>, Sched<[itins.Sched]>; 2341 def rm : SIi8<0xC2, MRMSrcMem, 2342 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2343 [(set RC:$dst, (OpNode (VT RC:$src1), 2344 (ld_frag addr:$src2), immLeaf:$cc))], 2345 itins.rm>, 2346 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2347 2348 // Accept explicit immediate argument form instead of comparison code. 2349 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2350 def rr_alt : SIi8<0xC2, MRMSrcReg, (outs RC:$dst), 2351 (ins RC:$src1, RC:$src2, u8imm:$cc), asm_alt, [], 2352 IIC_SSE_ALU_F32S_RR>, Sched<[itins.Sched]>; 2353 let mayLoad = 1 in 2354 def rm_alt : SIi8<0xC2, MRMSrcMem, (outs RC:$dst), 2355 (ins RC:$src1, x86memop:$src2, u8imm:$cc), asm_alt, [], 2356 IIC_SSE_ALU_F32S_RM>, 2357 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2358 } 2359} 2360 2361defm VCMPSS : sse12_cmp_scalar<FR32, f32mem, AVXCC, X86cmps, f32, loadf32, 2362 "cmp${cc}ss\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2363 "cmpss\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2364 SSE_ALU_F32S, i8immZExt5>, XS, VEX_4V, VEX_LIG; 2365defm VCMPSD : sse12_cmp_scalar<FR64, f64mem, AVXCC, X86cmps, f64, loadf64, 2366 "cmp${cc}sd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2367 "cmpsd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2368 SSE_ALU_F32S, i8immZExt5>, // same latency as 32 bit compare 2369 XD, VEX_4V, VEX_LIG; 2370 2371let Constraints = "$src1 = $dst" in { 2372 defm CMPSS : sse12_cmp_scalar<FR32, f32mem, SSECC, X86cmps, f32, loadf32, 2373 "cmp${cc}ss\t{$src2, $dst|$dst, $src2}", 2374 "cmpss\t{$cc, $src2, $dst|$dst, $src2, $cc}", SSE_ALU_F32S, 2375 i8immZExt3>, XS; 2376 defm CMPSD : sse12_cmp_scalar<FR64, f64mem, SSECC, X86cmps, f64, loadf64, 2377 "cmp${cc}sd\t{$src2, $dst|$dst, $src2}", 2378 "cmpsd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2379 SSE_ALU_F64S, i8immZExt3>, XD; 2380} 2381 2382multiclass sse12_cmp_scalar_int<X86MemOperand x86memop, Operand CC, 2383 Intrinsic Int, string asm, OpndItins itins, 2384 ImmLeaf immLeaf> { 2385 def rr : SIi8<0xC2, MRMSrcReg, (outs VR128:$dst), 2386 (ins VR128:$src1, VR128:$src, CC:$cc), asm, 2387 [(set VR128:$dst, (Int VR128:$src1, 2388 VR128:$src, immLeaf:$cc))], 2389 itins.rr>, 2390 Sched<[itins.Sched]>; 2391 def rm : SIi8<0xC2, MRMSrcMem, (outs VR128:$dst), 2392 (ins VR128:$src1, x86memop:$src, CC:$cc), asm, 2393 [(set VR128:$dst, (Int VR128:$src1, 2394 (load addr:$src), immLeaf:$cc))], 2395 itins.rm>, 2396 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2397} 2398 2399let isCodeGenOnly = 1 in { 2400 // Aliases to match intrinsics which expect XMM operand(s). 2401 defm Int_VCMPSS : sse12_cmp_scalar_int<f32mem, AVXCC, int_x86_sse_cmp_ss, 2402 "cmp${cc}ss\t{$src, $src1, $dst|$dst, $src1, $src}", 2403 SSE_ALU_F32S, i8immZExt5>, 2404 XS, VEX_4V; 2405 defm Int_VCMPSD : sse12_cmp_scalar_int<f64mem, AVXCC, int_x86_sse2_cmp_sd, 2406 "cmp${cc}sd\t{$src, $src1, $dst|$dst, $src1, $src}", 2407 SSE_ALU_F32S, i8immZExt5>, // same latency as f32 2408 XD, VEX_4V; 2409 let Constraints = "$src1 = $dst" in { 2410 defm Int_CMPSS : sse12_cmp_scalar_int<f32mem, SSECC, int_x86_sse_cmp_ss, 2411 "cmp${cc}ss\t{$src, $dst|$dst, $src}", 2412 SSE_ALU_F32S, i8immZExt3>, XS; 2413 defm Int_CMPSD : sse12_cmp_scalar_int<f64mem, SSECC, int_x86_sse2_cmp_sd, 2414 "cmp${cc}sd\t{$src, $dst|$dst, $src}", 2415 SSE_ALU_F64S, i8immZExt3>, 2416 XD; 2417} 2418} 2419 2420 2421// sse12_ord_cmp - Unordered/Ordered scalar fp compare and set EFLAGS 2422multiclass sse12_ord_cmp<bits<8> opc, RegisterClass RC, SDNode OpNode, 2423 ValueType vt, X86MemOperand x86memop, 2424 PatFrag ld_frag, string OpcodeStr> { 2425 def rr: SI<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 2426 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2427 [(set EFLAGS, (OpNode (vt RC:$src1), RC:$src2))], 2428 IIC_SSE_COMIS_RR>, 2429 Sched<[WriteFAdd]>; 2430 def rm: SI<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 2431 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 2432 [(set EFLAGS, (OpNode (vt RC:$src1), 2433 (ld_frag addr:$src2)))], 2434 IIC_SSE_COMIS_RM>, 2435 Sched<[WriteFAddLd, ReadAfterLd]>; 2436} 2437 2438let Defs = [EFLAGS] in { 2439 defm VUCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2440 "ucomiss">, PS, VEX, VEX_LIG; 2441 defm VUCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2442 "ucomisd">, PD, VEX, VEX_LIG; 2443 let Pattern = []<dag> in { 2444 defm VCOMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2445 "comiss">, PS, VEX, VEX_LIG; 2446 defm VCOMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2447 "comisd">, PD, VEX, VEX_LIG; 2448 } 2449 2450 let isCodeGenOnly = 1 in { 2451 defm Int_VUCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2452 load, "ucomiss">, PS, VEX; 2453 defm Int_VUCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2454 load, "ucomisd">, PD, VEX; 2455 2456 defm Int_VCOMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, 2457 load, "comiss">, PS, VEX; 2458 defm Int_VCOMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, 2459 load, "comisd">, PD, VEX; 2460 } 2461 defm UCOMISS : sse12_ord_cmp<0x2E, FR32, X86cmp, f32, f32mem, loadf32, 2462 "ucomiss">, PS; 2463 defm UCOMISD : sse12_ord_cmp<0x2E, FR64, X86cmp, f64, f64mem, loadf64, 2464 "ucomisd">, PD; 2465 2466 let Pattern = []<dag> in { 2467 defm COMISS : sse12_ord_cmp<0x2F, VR128, undef, v4f32, f128mem, load, 2468 "comiss">, PS; 2469 defm COMISD : sse12_ord_cmp<0x2F, VR128, undef, v2f64, f128mem, load, 2470 "comisd">, PD; 2471 } 2472 2473 let isCodeGenOnly = 1 in { 2474 defm Int_UCOMISS : sse12_ord_cmp<0x2E, VR128, X86ucomi, v4f32, f128mem, 2475 load, "ucomiss">, PS; 2476 defm Int_UCOMISD : sse12_ord_cmp<0x2E, VR128, X86ucomi, v2f64, f128mem, 2477 load, "ucomisd">, PD; 2478 2479 defm Int_COMISS : sse12_ord_cmp<0x2F, VR128, X86comi, v4f32, f128mem, load, 2480 "comiss">, PS; 2481 defm Int_COMISD : sse12_ord_cmp<0x2F, VR128, X86comi, v2f64, f128mem, load, 2482 "comisd">, PD; 2483 } 2484} // Defs = [EFLAGS] 2485 2486// sse12_cmp_packed - sse 1 & 2 compare packed instructions 2487multiclass sse12_cmp_packed<RegisterClass RC, X86MemOperand x86memop, 2488 Operand CC, Intrinsic Int, string asm, 2489 string asm_alt, Domain d, ImmLeaf immLeaf, 2490 PatFrag ld_frag, OpndItins itins = SSE_ALU_F32P> { 2491 let isCommutable = 1 in 2492 def rri : PIi8<0xC2, MRMSrcReg, 2493 (outs RC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, 2494 [(set RC:$dst, (Int RC:$src1, RC:$src2, immLeaf:$cc))], 2495 itins.rr, d>, 2496 Sched<[WriteFAdd]>; 2497 def rmi : PIi8<0xC2, MRMSrcMem, 2498 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, 2499 [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2), immLeaf:$cc))], 2500 itins.rm, d>, 2501 Sched<[WriteFAddLd, ReadAfterLd]>; 2502 2503 // Accept explicit immediate argument form instead of comparison code. 2504 let isAsmParserOnly = 1, hasSideEffects = 0 in { 2505 def rri_alt : PIi8<0xC2, MRMSrcReg, 2506 (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$cc), 2507 asm_alt, [], itins.rr, d>, Sched<[WriteFAdd]>; 2508 let mayLoad = 1 in 2509 def rmi_alt : PIi8<0xC2, MRMSrcMem, 2510 (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$cc), 2511 asm_alt, [], itins.rm, d>, 2512 Sched<[WriteFAddLd, ReadAfterLd]>; 2513 } 2514} 2515 2516defm VCMPPS : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse_cmp_ps, 2517 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2518 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2519 SSEPackedSingle, i8immZExt5, loadv4f32>, PS, VEX_4V; 2520defm VCMPPD : sse12_cmp_packed<VR128, f128mem, AVXCC, int_x86_sse2_cmp_pd, 2521 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2522 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2523 SSEPackedDouble, i8immZExt5, loadv2f64>, PD, VEX_4V; 2524defm VCMPPSY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_ps_256, 2525 "cmp${cc}ps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2526 "cmpps\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2527 SSEPackedSingle, i8immZExt5, loadv8f32>, PS, VEX_4V, VEX_L; 2528defm VCMPPDY : sse12_cmp_packed<VR256, f256mem, AVXCC, int_x86_avx_cmp_pd_256, 2529 "cmp${cc}pd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2530 "cmppd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}", 2531 SSEPackedDouble, i8immZExt5, loadv4f64>, PD, VEX_4V, VEX_L; 2532let Constraints = "$src1 = $dst" in { 2533 defm CMPPS : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse_cmp_ps, 2534 "cmp${cc}ps\t{$src2, $dst|$dst, $src2}", 2535 "cmpps\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2536 SSEPackedSingle, i8immZExt5, memopv4f32, SSE_ALU_F32P>, PS; 2537 defm CMPPD : sse12_cmp_packed<VR128, f128mem, SSECC, int_x86_sse2_cmp_pd, 2538 "cmp${cc}pd\t{$src2, $dst|$dst, $src2}", 2539 "cmppd\t{$cc, $src2, $dst|$dst, $src2, $cc}", 2540 SSEPackedDouble, i8immZExt5, memopv2f64, SSE_ALU_F64P>, PD; 2541} 2542 2543let Predicates = [HasAVX] in { 2544def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2545 (VCMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2546def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (loadv4f32 addr:$src2), imm:$cc)), 2547 (VCMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2548def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2549 (VCMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2550def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (loadv2f64 addr:$src2), imm:$cc)), 2551 (VCMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2552 2553def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), VR256:$src2, imm:$cc)), 2554 (VCMPPSYrri (v8f32 VR256:$src1), (v8f32 VR256:$src2), imm:$cc)>; 2555def : Pat<(v8i32 (X86cmpp (v8f32 VR256:$src1), (loadv8f32 addr:$src2), imm:$cc)), 2556 (VCMPPSYrmi (v8f32 VR256:$src1), addr:$src2, imm:$cc)>; 2557def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), VR256:$src2, imm:$cc)), 2558 (VCMPPDYrri VR256:$src1, VR256:$src2, imm:$cc)>; 2559def : Pat<(v4i64 (X86cmpp (v4f64 VR256:$src1), (loadv4f64 addr:$src2), imm:$cc)), 2560 (VCMPPDYrmi VR256:$src1, addr:$src2, imm:$cc)>; 2561} 2562 2563let Predicates = [UseSSE1] in { 2564def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), VR128:$src2, imm:$cc)), 2565 (CMPPSrri (v4f32 VR128:$src1), (v4f32 VR128:$src2), imm:$cc)>; 2566def : Pat<(v4i32 (X86cmpp (v4f32 VR128:$src1), (memopv4f32 addr:$src2), imm:$cc)), 2567 (CMPPSrmi (v4f32 VR128:$src1), addr:$src2, imm:$cc)>; 2568} 2569 2570let Predicates = [UseSSE2] in { 2571def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), VR128:$src2, imm:$cc)), 2572 (CMPPDrri VR128:$src1, VR128:$src2, imm:$cc)>; 2573def : Pat<(v2i64 (X86cmpp (v2f64 VR128:$src1), (memopv2f64 addr:$src2), imm:$cc)), 2574 (CMPPDrmi VR128:$src1, addr:$src2, imm:$cc)>; 2575} 2576 2577//===----------------------------------------------------------------------===// 2578// SSE 1 & 2 - Shuffle Instructions 2579//===----------------------------------------------------------------------===// 2580 2581/// sse12_shuffle - sse 1 & 2 fp shuffle instructions 2582multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, 2583 ValueType vt, string asm, PatFrag mem_frag, 2584 Domain d> { 2585 def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), 2586 (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, 2587 [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), 2588 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2589 Sched<[WriteFShuffleLd, ReadAfterLd]>; 2590 def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), 2591 (ins RC:$src1, RC:$src2, u8imm:$src3), asm, 2592 [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, 2593 (i8 imm:$src3))))], IIC_SSE_SHUFP, d>, 2594 Sched<[WriteFShuffle]>; 2595} 2596 2597defm VSHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2598 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2599 loadv4f32, SSEPackedSingle>, PS, VEX_4V; 2600defm VSHUFPSY : sse12_shuffle<VR256, f256mem, v8f32, 2601 "shufps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2602 loadv8f32, SSEPackedSingle>, PS, VEX_4V, VEX_L; 2603defm VSHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2604 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2605 loadv2f64, SSEPackedDouble>, PD, VEX_4V; 2606defm VSHUFPDY : sse12_shuffle<VR256, f256mem, v4f64, 2607 "shufpd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 2608 loadv4f64, SSEPackedDouble>, PD, VEX_4V, VEX_L; 2609 2610let Constraints = "$src1 = $dst" in { 2611 defm SHUFPS : sse12_shuffle<VR128, f128mem, v4f32, 2612 "shufps\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2613 memopv4f32, SSEPackedSingle>, PS; 2614 defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, 2615 "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", 2616 memopv2f64, SSEPackedDouble>, PD; 2617} 2618 2619let Predicates = [HasAVX] in { 2620 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2621 (bc_v4i32 (loadv2i64 addr:$src2)), (i8 imm:$imm))), 2622 (VSHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2623 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2624 (VSHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2625 2626 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2627 (loadv2i64 addr:$src2), (i8 imm:$imm))), 2628 (VSHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2629 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2630 (VSHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2631 2632 // 256-bit patterns 2633 def : Pat<(v8i32 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2634 (VSHUFPSYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2635 def : Pat<(v8i32 (X86Shufp VR256:$src1, 2636 (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 2637 (VSHUFPSYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2638 2639 def : Pat<(v4i64 (X86Shufp VR256:$src1, VR256:$src2, (i8 imm:$imm))), 2640 (VSHUFPDYrri VR256:$src1, VR256:$src2, imm:$imm)>; 2641 def : Pat<(v4i64 (X86Shufp VR256:$src1, 2642 (loadv4i64 addr:$src2), (i8 imm:$imm))), 2643 (VSHUFPDYrmi VR256:$src1, addr:$src2, imm:$imm)>; 2644} 2645 2646let Predicates = [UseSSE1] in { 2647 def : Pat<(v4i32 (X86Shufp VR128:$src1, 2648 (bc_v4i32 (memopv2i64 addr:$src2)), (i8 imm:$imm))), 2649 (SHUFPSrmi VR128:$src1, addr:$src2, imm:$imm)>; 2650 def : Pat<(v4i32 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2651 (SHUFPSrri VR128:$src1, VR128:$src2, imm:$imm)>; 2652} 2653 2654let Predicates = [UseSSE2] in { 2655 // Generic SHUFPD patterns 2656 def : Pat<(v2i64 (X86Shufp VR128:$src1, 2657 (memopv2i64 addr:$src2), (i8 imm:$imm))), 2658 (SHUFPDrmi VR128:$src1, addr:$src2, imm:$imm)>; 2659 def : Pat<(v2i64 (X86Shufp VR128:$src1, VR128:$src2, (i8 imm:$imm))), 2660 (SHUFPDrri VR128:$src1, VR128:$src2, imm:$imm)>; 2661} 2662 2663//===----------------------------------------------------------------------===// 2664// SSE 1 & 2 - Unpack FP Instructions 2665//===----------------------------------------------------------------------===// 2666 2667/// sse12_unpack_interleave - sse 1 & 2 fp unpack and interleave 2668multiclass sse12_unpack_interleave<bits<8> opc, SDNode OpNode, ValueType vt, 2669 PatFrag mem_frag, RegisterClass RC, 2670 X86MemOperand x86memop, string asm, 2671 Domain d> { 2672 def rr : PI<opc, MRMSrcReg, 2673 (outs RC:$dst), (ins RC:$src1, RC:$src2), 2674 asm, [(set RC:$dst, 2675 (vt (OpNode RC:$src1, RC:$src2)))], 2676 IIC_SSE_UNPCK, d>, Sched<[WriteFShuffle]>; 2677 def rm : PI<opc, MRMSrcMem, 2678 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 2679 asm, [(set RC:$dst, 2680 (vt (OpNode RC:$src1, 2681 (mem_frag addr:$src2))))], 2682 IIC_SSE_UNPCK, d>, 2683 Sched<[WriteFShuffleLd, ReadAfterLd]>; 2684} 2685 2686defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, 2687 VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2688 SSEPackedSingle>, PS, VEX_4V; 2689defm VUNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, loadv2f64, 2690 VR128, f128mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2691 SSEPackedDouble>, PD, VEX_4V; 2692defm VUNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, loadv4f32, 2693 VR128, f128mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2694 SSEPackedSingle>, PS, VEX_4V; 2695defm VUNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, loadv2f64, 2696 VR128, f128mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2697 SSEPackedDouble>, PD, VEX_4V; 2698 2699defm VUNPCKHPSY: sse12_unpack_interleave<0x15, X86Unpckh, v8f32, loadv8f32, 2700 VR256, f256mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2701 SSEPackedSingle>, PS, VEX_4V, VEX_L; 2702defm VUNPCKHPDY: sse12_unpack_interleave<0x15, X86Unpckh, v4f64, loadv4f64, 2703 VR256, f256mem, "unpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2704 SSEPackedDouble>, PD, VEX_4V, VEX_L; 2705defm VUNPCKLPSY: sse12_unpack_interleave<0x14, X86Unpckl, v8f32, loadv8f32, 2706 VR256, f256mem, "unpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2707 SSEPackedSingle>, PS, VEX_4V, VEX_L; 2708defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, 2709 VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", 2710 SSEPackedDouble>, PD, VEX_4V, VEX_L; 2711 2712let Constraints = "$src1 = $dst" in { 2713 defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, 2714 VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", 2715 SSEPackedSingle>, PS; 2716 defm UNPCKHPD: sse12_unpack_interleave<0x15, X86Unpckh, v2f64, memopv2f64, 2717 VR128, f128mem, "unpckhpd\t{$src2, $dst|$dst, $src2}", 2718 SSEPackedDouble>, PD; 2719 defm UNPCKLPS: sse12_unpack_interleave<0x14, X86Unpckl, v4f32, memopv4f32, 2720 VR128, f128mem, "unpcklps\t{$src2, $dst|$dst, $src2}", 2721 SSEPackedSingle>, PS; 2722 defm UNPCKLPD: sse12_unpack_interleave<0x14, X86Unpckl, v2f64, memopv2f64, 2723 VR128, f128mem, "unpcklpd\t{$src2, $dst|$dst, $src2}", 2724 SSEPackedDouble>, PD; 2725} // Constraints = "$src1 = $dst" 2726 2727let Predicates = [HasAVX1Only] in { 2728 def : Pat<(v8i32 (X86Unpckl VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2729 (VUNPCKLPSYrm VR256:$src1, addr:$src2)>; 2730 def : Pat<(v8i32 (X86Unpckl VR256:$src1, VR256:$src2)), 2731 (VUNPCKLPSYrr VR256:$src1, VR256:$src2)>; 2732 def : Pat<(v8i32 (X86Unpckh VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 2733 (VUNPCKHPSYrm VR256:$src1, addr:$src2)>; 2734 def : Pat<(v8i32 (X86Unpckh VR256:$src1, VR256:$src2)), 2735 (VUNPCKHPSYrr VR256:$src1, VR256:$src2)>; 2736 2737 def : Pat<(v4i64 (X86Unpckl VR256:$src1, (loadv4i64 addr:$src2))), 2738 (VUNPCKLPDYrm VR256:$src1, addr:$src2)>; 2739 def : Pat<(v4i64 (X86Unpckl VR256:$src1, VR256:$src2)), 2740 (VUNPCKLPDYrr VR256:$src1, VR256:$src2)>; 2741 def : Pat<(v4i64 (X86Unpckh VR256:$src1, (loadv4i64 addr:$src2))), 2742 (VUNPCKHPDYrm VR256:$src1, addr:$src2)>; 2743 def : Pat<(v4i64 (X86Unpckh VR256:$src1, VR256:$src2)), 2744 (VUNPCKHPDYrr VR256:$src1, VR256:$src2)>; 2745} 2746 2747//===----------------------------------------------------------------------===// 2748// SSE 1 & 2 - Extract Floating-Point Sign mask 2749//===----------------------------------------------------------------------===// 2750 2751/// sse12_extr_sign_mask - sse 1 & 2 unpack and interleave 2752multiclass sse12_extr_sign_mask<RegisterClass RC, Intrinsic Int, string asm, 2753 Domain d> { 2754 def rr : PI<0x50, MRMSrcReg, (outs GR32orGR64:$dst), (ins RC:$src), 2755 !strconcat(asm, "\t{$src, $dst|$dst, $src}"), 2756 [(set GR32orGR64:$dst, (Int RC:$src))], IIC_SSE_MOVMSK, d>, 2757 Sched<[WriteVecLogic]>; 2758} 2759 2760let Predicates = [HasAVX] in { 2761 defm VMOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, 2762 "movmskps", SSEPackedSingle>, PS, VEX; 2763 defm VMOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, 2764 "movmskpd", SSEPackedDouble>, PD, VEX; 2765 defm VMOVMSKPSY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_ps_256, 2766 "movmskps", SSEPackedSingle>, PS, 2767 VEX, VEX_L; 2768 defm VMOVMSKPDY : sse12_extr_sign_mask<VR256, int_x86_avx_movmsk_pd_256, 2769 "movmskpd", SSEPackedDouble>, PD, 2770 VEX, VEX_L; 2771 2772 def : Pat<(i32 (X86fgetsign FR32:$src)), 2773 (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 2774 def : Pat<(i64 (X86fgetsign FR32:$src)), 2775 (SUBREG_TO_REG (i64 0), 2776 (VMOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>; 2777 def : Pat<(i32 (X86fgetsign FR64:$src)), 2778 (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 2779 def : Pat<(i64 (X86fgetsign FR64:$src)), 2780 (SUBREG_TO_REG (i64 0), 2781 (VMOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>; 2782} 2783 2784defm MOVMSKPS : sse12_extr_sign_mask<VR128, int_x86_sse_movmsk_ps, "movmskps", 2785 SSEPackedSingle>, PS; 2786defm MOVMSKPD : sse12_extr_sign_mask<VR128, int_x86_sse2_movmsk_pd, "movmskpd", 2787 SSEPackedDouble>, PD; 2788 2789def : Pat<(i32 (X86fgetsign FR32:$src)), 2790 (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128))>, 2791 Requires<[UseSSE1]>; 2792def : Pat<(i64 (X86fgetsign FR32:$src)), 2793 (SUBREG_TO_REG (i64 0), 2794 (MOVMSKPSrr (COPY_TO_REGCLASS FR32:$src, VR128)), sub_32bit)>, 2795 Requires<[UseSSE1]>; 2796def : Pat<(i32 (X86fgetsign FR64:$src)), 2797 (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128))>, 2798 Requires<[UseSSE2]>; 2799def : Pat<(i64 (X86fgetsign FR64:$src)), 2800 (SUBREG_TO_REG (i64 0), 2801 (MOVMSKPDrr (COPY_TO_REGCLASS FR64:$src, VR128)), sub_32bit)>, 2802 Requires<[UseSSE2]>; 2803 2804//===---------------------------------------------------------------------===// 2805// SSE2 - Packed Integer Logical Instructions 2806//===---------------------------------------------------------------------===// 2807 2808let ExeDomain = SSEPackedInt in { // SSE integer instructions 2809 2810/// PDI_binop_rm - Simple SSE2 binary operator. 2811multiclass PDI_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 2812 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 2813 X86MemOperand x86memop, OpndItins itins, 2814 bit IsCommutable, bit Is2Addr> { 2815 let isCommutable = IsCommutable in 2816 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 2817 (ins RC:$src1, RC:$src2), 2818 !if(Is2Addr, 2819 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2820 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2821 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 2822 Sched<[itins.Sched]>; 2823 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 2824 (ins RC:$src1, x86memop:$src2), 2825 !if(Is2Addr, 2826 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 2827 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 2828 [(set RC:$dst, (OpVT (OpNode RC:$src1, 2829 (bitconvert (memop_frag addr:$src2)))))], 2830 itins.rm>, 2831 Sched<[itins.Sched.Folded, ReadAfterLd]>; 2832} 2833} // ExeDomain = SSEPackedInt 2834 2835multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode, 2836 ValueType OpVT128, ValueType OpVT256, 2837 OpndItins itins, bit IsCommutable = 0> { 2838let Predicates = [HasAVX, NoVLX] in 2839 defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128, 2840 VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V; 2841 2842let Constraints = "$src1 = $dst" in 2843 defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128, 2844 memopv2i64, i128mem, itins, IsCommutable, 1>; 2845 2846let Predicates = [HasAVX2, NoVLX] in 2847 defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, 2848 OpVT256, VR256, loadv4i64, i256mem, itins, 2849 IsCommutable, 0>, VEX_4V, VEX_L; 2850} 2851 2852// These are ordered here for pattern ordering requirements with the fp versions 2853 2854defm PAND : PDI_binop_all<0xDB, "pand", and, v2i64, v4i64, 2855 SSE_VEC_BIT_ITINS_P, 1>; 2856defm POR : PDI_binop_all<0xEB, "por", or, v2i64, v4i64, 2857 SSE_VEC_BIT_ITINS_P, 1>; 2858defm PXOR : PDI_binop_all<0xEF, "pxor", xor, v2i64, v4i64, 2859 SSE_VEC_BIT_ITINS_P, 1>; 2860defm PANDN : PDI_binop_all<0xDF, "pandn", X86andnp, v2i64, v4i64, 2861 SSE_VEC_BIT_ITINS_P, 0>; 2862 2863//===----------------------------------------------------------------------===// 2864// SSE 1 & 2 - Logical Instructions 2865//===----------------------------------------------------------------------===// 2866 2867// Multiclass for scalars using the X86 logical operation aliases for FP. 2868multiclass sse12_fp_packed_scalar_logical_alias< 2869 bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { 2870 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2871 FR32, f32, f128mem, loadf32_128, SSEPackedSingle, itins, 0>, 2872 PS, VEX_4V; 2873 2874 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2875 FR64, f64, f128mem, loadf64_128, SSEPackedDouble, itins, 0>, 2876 PD, VEX_4V; 2877 2878 let Constraints = "$src1 = $dst" in { 2879 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, FR32, 2880 f32, f128mem, memopfsf32_128, SSEPackedSingle, itins>, PS; 2881 2882 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, FR64, 2883 f64, f128mem, memopfsf64_128, SSEPackedDouble, itins>, PD; 2884 } 2885} 2886 2887let isCodeGenOnly = 1 in { 2888 defm FsAND : sse12_fp_packed_scalar_logical_alias<0x54, "and", X86fand, 2889 SSE_BIT_ITINS_P>; 2890 defm FsOR : sse12_fp_packed_scalar_logical_alias<0x56, "or", X86for, 2891 SSE_BIT_ITINS_P>; 2892 defm FsXOR : sse12_fp_packed_scalar_logical_alias<0x57, "xor", X86fxor, 2893 SSE_BIT_ITINS_P>; 2894 2895 let isCommutable = 0 in 2896 defm FsANDN : sse12_fp_packed_scalar_logical_alias<0x55, "andn", X86fandn, 2897 SSE_BIT_ITINS_P>; 2898} 2899 2900// Multiclass for vectors using the X86 logical operation aliases for FP. 2901multiclass sse12_fp_packed_vector_logical_alias< 2902 bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { 2903 let Predicates = [HasAVX, NoVLX] in { 2904 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 2905 VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>, 2906 PS, VEX_4V; 2907 2908 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 2909 VR128, v2f64, f128mem, loadv2f64, SSEPackedDouble, itins, 0>, 2910 PD, VEX_4V; 2911 } 2912 2913 let Constraints = "$src1 = $dst" in { 2914 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 2915 v4f32, f128mem, memopv4f32, SSEPackedSingle, itins>, 2916 PS; 2917 2918 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 2919 v2f64, f128mem, memopv2f64, SSEPackedDouble, itins>, 2920 PD; 2921 } 2922} 2923 2924let isCodeGenOnly = 1 in { 2925 defm FvAND : sse12_fp_packed_vector_logical_alias<0x54, "and", X86fand, 2926 SSE_BIT_ITINS_P>; 2927 defm FvOR : sse12_fp_packed_vector_logical_alias<0x56, "or", X86for, 2928 SSE_BIT_ITINS_P>; 2929 defm FvXOR : sse12_fp_packed_vector_logical_alias<0x57, "xor", X86fxor, 2930 SSE_BIT_ITINS_P>; 2931 2932 let isCommutable = 0 in 2933 defm FvANDN : sse12_fp_packed_vector_logical_alias<0x55, "andn", X86fandn, 2934 SSE_BIT_ITINS_P>; 2935} 2936 2937/// sse12_fp_packed_logical - SSE 1 & 2 packed FP logical ops 2938/// 2939multiclass sse12_fp_packed_logical<bits<8> opc, string OpcodeStr, 2940 SDNode OpNode> { 2941 let Predicates = [HasAVX, NoVLX] in { 2942 defm V#NAME#PSY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedSingle, 2943 !strconcat(OpcodeStr, "ps"), f256mem, 2944 [(set VR256:$dst, (v4i64 (OpNode VR256:$src1, VR256:$src2)))], 2945 [(set VR256:$dst, (OpNode (bc_v4i64 (v8f32 VR256:$src1)), 2946 (loadv4i64 addr:$src2)))], 0>, PS, VEX_4V, VEX_L; 2947 2948 defm V#NAME#PDY : sse12_fp_packed_logical_rm<opc, VR256, SSEPackedDouble, 2949 !strconcat(OpcodeStr, "pd"), f256mem, 2950 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2951 (bc_v4i64 (v4f64 VR256:$src2))))], 2952 [(set VR256:$dst, (OpNode (bc_v4i64 (v4f64 VR256:$src1)), 2953 (loadv4i64 addr:$src2)))], 0>, 2954 PD, VEX_4V, VEX_L; 2955 2956 // In AVX no need to add a pattern for 128-bit logical rr ps, because they 2957 // are all promoted to v2i64, and the patterns are covered by the int 2958 // version. This is needed in SSE only, because v2i64 isn't supported on 2959 // SSE1, but only on SSE2. 2960 defm V#NAME#PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2961 !strconcat(OpcodeStr, "ps"), f128mem, [], 2962 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2963 (loadv2i64 addr:$src2)))], 0>, PS, VEX_4V; 2964 2965 defm V#NAME#PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2966 !strconcat(OpcodeStr, "pd"), f128mem, 2967 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2968 (bc_v2i64 (v2f64 VR128:$src2))))], 2969 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2970 (loadv2i64 addr:$src2)))], 0>, 2971 PD, VEX_4V; 2972 } 2973 2974 let Constraints = "$src1 = $dst" in { 2975 defm PS : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedSingle, 2976 !strconcat(OpcodeStr, "ps"), f128mem, 2977 [(set VR128:$dst, (v2i64 (OpNode VR128:$src1, VR128:$src2)))], 2978 [(set VR128:$dst, (OpNode (bc_v2i64 (v4f32 VR128:$src1)), 2979 (memopv2i64 addr:$src2)))]>, PS; 2980 2981 defm PD : sse12_fp_packed_logical_rm<opc, VR128, SSEPackedDouble, 2982 !strconcat(OpcodeStr, "pd"), f128mem, 2983 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2984 (bc_v2i64 (v2f64 VR128:$src2))))], 2985 [(set VR128:$dst, (OpNode (bc_v2i64 (v2f64 VR128:$src1)), 2986 (memopv2i64 addr:$src2)))]>, PD; 2987 } 2988} 2989 2990defm AND : sse12_fp_packed_logical<0x54, "and", and>; 2991defm OR : sse12_fp_packed_logical<0x56, "or", or>; 2992defm XOR : sse12_fp_packed_logical<0x57, "xor", xor>; 2993let isCommutable = 0 in 2994 defm ANDN : sse12_fp_packed_logical<0x55, "andn", X86andnp>; 2995 2996// AVX1 requires type coercions in order to fold loads directly into logical 2997// operations. 2998let Predicates = [HasAVX1Only] in { 2999 def : Pat<(bc_v8f32 (and VR256:$src1, (loadv4i64 addr:$src2))), 3000 (VANDPSYrm VR256:$src1, addr:$src2)>; 3001 def : Pat<(bc_v8f32 (or VR256:$src1, (loadv4i64 addr:$src2))), 3002 (VORPSYrm VR256:$src1, addr:$src2)>; 3003 def : Pat<(bc_v8f32 (xor VR256:$src1, (loadv4i64 addr:$src2))), 3004 (VXORPSYrm VR256:$src1, addr:$src2)>; 3005 def : Pat<(bc_v8f32 (X86andnp VR256:$src1, (loadv4i64 addr:$src2))), 3006 (VANDNPSYrm VR256:$src1, addr:$src2)>; 3007} 3008 3009//===----------------------------------------------------------------------===// 3010// SSE 1 & 2 - Arithmetic Instructions 3011//===----------------------------------------------------------------------===// 3012 3013/// basic_sse12_fp_binop_xxx - SSE 1 & 2 binops come in both scalar and 3014/// vector forms. 3015/// 3016/// In addition, we also have a special variant of the scalar form here to 3017/// represent the associated intrinsic operation. This form is unlike the 3018/// plain scalar form, in that it takes an entire vector (instead of a scalar) 3019/// and leaves the top elements unmodified (therefore these cannot be commuted). 3020/// 3021/// These three forms can each be reg+reg or reg+mem. 3022/// 3023 3024/// FIXME: once all 256-bit intrinsics are matched, cleanup and refactor those 3025/// classes below 3026multiclass basic_sse12_fp_binop_p<bits<8> opc, string OpcodeStr, 3027 SDNode OpNode, SizeItins itins> { 3028 let Predicates = [HasAVX, NoVLX] in { 3029 defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, 3030 VR128, v4f32, f128mem, loadv4f32, 3031 SSEPackedSingle, itins.s, 0>, PS, VEX_4V; 3032 defm V#NAME#PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, 3033 VR128, v2f64, f128mem, loadv2f64, 3034 SSEPackedDouble, itins.d, 0>, PD, VEX_4V; 3035 3036 defm V#NAME#PSY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), 3037 OpNode, VR256, v8f32, f256mem, loadv8f32, 3038 SSEPackedSingle, itins.s, 0>, PS, VEX_4V, VEX_L; 3039 defm V#NAME#PDY : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), 3040 OpNode, VR256, v4f64, f256mem, loadv4f64, 3041 SSEPackedDouble, itins.d, 0>, PD, VEX_4V, VEX_L; 3042 } 3043 3044 let Constraints = "$src1 = $dst" in { 3045 defm PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode, VR128, 3046 v4f32, f128mem, memopv4f32, SSEPackedSingle, 3047 itins.s>, PS; 3048 defm PD : sse12_fp_packed<opc, !strconcat(OpcodeStr, "pd"), OpNode, VR128, 3049 v2f64, f128mem, memopv2f64, SSEPackedDouble, 3050 itins.d>, PD; 3051 } 3052} 3053 3054multiclass basic_sse12_fp_binop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 3055 SizeItins itins> { 3056 defm V#NAME#SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 3057 OpNode, FR32, f32mem, itins.s, 0>, XS, VEX_4V, VEX_LIG; 3058 defm V#NAME#SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 3059 OpNode, FR64, f64mem, itins.d, 0>, XD, VEX_4V, VEX_LIG; 3060 3061 let Constraints = "$src1 = $dst" in { 3062 defm SS : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "ss"), 3063 OpNode, FR32, f32mem, itins.s>, XS; 3064 defm SD : sse12_fp_scalar<opc, !strconcat(OpcodeStr, "sd"), 3065 OpNode, FR64, f64mem, itins.d>, XD; 3066 } 3067} 3068 3069multiclass basic_sse12_fp_binop_s_int<bits<8> opc, string OpcodeStr, 3070 SizeItins itins> { 3071 defm V#NAME#SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3072 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 3073 itins.s, 0>, XS, VEX_4V, VEX_LIG; 3074 defm V#NAME#SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3075 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 3076 itins.d, 0>, XD, VEX_4V, VEX_LIG; 3077 3078 let Constraints = "$src1 = $dst" in { 3079 defm SS : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3080 !strconcat(OpcodeStr, "ss"), "", "_ss", ssmem, sse_load_f32, 3081 itins.s>, XS; 3082 defm SD : sse12_fp_scalar_int<opc, OpcodeStr, VR128, 3083 !strconcat(OpcodeStr, "sd"), "2", "_sd", sdmem, sse_load_f64, 3084 itins.d>, XD; 3085 } 3086} 3087 3088// Binary Arithmetic instructions 3089defm ADD : basic_sse12_fp_binop_p<0x58, "add", fadd, SSE_ALU_ITINS_P>, 3090 basic_sse12_fp_binop_s<0x58, "add", fadd, SSE_ALU_ITINS_S>, 3091 basic_sse12_fp_binop_s_int<0x58, "add", SSE_ALU_ITINS_S>; 3092defm MUL : basic_sse12_fp_binop_p<0x59, "mul", fmul, SSE_MUL_ITINS_P>, 3093 basic_sse12_fp_binop_s<0x59, "mul", fmul, SSE_MUL_ITINS_S>, 3094 basic_sse12_fp_binop_s_int<0x59, "mul", SSE_MUL_ITINS_S>; 3095let isCommutable = 0 in { 3096 defm SUB : basic_sse12_fp_binop_p<0x5C, "sub", fsub, SSE_ALU_ITINS_P>, 3097 basic_sse12_fp_binop_s<0x5C, "sub", fsub, SSE_ALU_ITINS_S>, 3098 basic_sse12_fp_binop_s_int<0x5C, "sub", SSE_ALU_ITINS_S>; 3099 defm DIV : basic_sse12_fp_binop_p<0x5E, "div", fdiv, SSE_DIV_ITINS_P>, 3100 basic_sse12_fp_binop_s<0x5E, "div", fdiv, SSE_DIV_ITINS_S>, 3101 basic_sse12_fp_binop_s_int<0x5E, "div", SSE_DIV_ITINS_S>; 3102 defm MAX : basic_sse12_fp_binop_p<0x5F, "max", X86fmax, SSE_ALU_ITINS_P>, 3103 basic_sse12_fp_binop_s<0x5F, "max", X86fmax, SSE_ALU_ITINS_S>, 3104 basic_sse12_fp_binop_s_int<0x5F, "max", SSE_ALU_ITINS_S>; 3105 defm MIN : basic_sse12_fp_binop_p<0x5D, "min", X86fmin, SSE_ALU_ITINS_P>, 3106 basic_sse12_fp_binop_s<0x5D, "min", X86fmin, SSE_ALU_ITINS_S>, 3107 basic_sse12_fp_binop_s_int<0x5D, "min", SSE_ALU_ITINS_S>; 3108} 3109 3110let isCodeGenOnly = 1 in { 3111 defm MAXC: basic_sse12_fp_binop_p<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_P>, 3112 basic_sse12_fp_binop_s<0x5F, "max", X86fmaxc, SSE_ALU_ITINS_S>; 3113 defm MINC: basic_sse12_fp_binop_p<0x5D, "min", X86fminc, SSE_ALU_ITINS_P>, 3114 basic_sse12_fp_binop_s<0x5D, "min", X86fminc, SSE_ALU_ITINS_S>; 3115} 3116 3117// Patterns used to select SSE scalar fp arithmetic instructions from 3118// either: 3119// 3120// (1) a scalar fp operation followed by a blend 3121// 3122// The effect is that the backend no longer emits unnecessary vector 3123// insert instructions immediately after SSE scalar fp instructions 3124// like addss or mulss. 3125// 3126// For example, given the following code: 3127// __m128 foo(__m128 A, __m128 B) { 3128// A[0] += B[0]; 3129// return A; 3130// } 3131// 3132// Previously we generated: 3133// addss %xmm0, %xmm1 3134// movss %xmm1, %xmm0 3135// 3136// We now generate: 3137// addss %xmm1, %xmm0 3138// 3139// (2) a vector packed single/double fp operation followed by a vector insert 3140// 3141// The effect is that the backend converts the packed fp instruction 3142// followed by a vector insert into a single SSE scalar fp instruction. 3143// 3144// For example, given the following code: 3145// __m128 foo(__m128 A, __m128 B) { 3146// __m128 C = A + B; 3147// return (__m128) {c[0], a[1], a[2], a[3]}; 3148// } 3149// 3150// Previously we generated: 3151// addps %xmm0, %xmm1 3152// movss %xmm1, %xmm0 3153// 3154// We now generate: 3155// addss %xmm1, %xmm0 3156 3157// TODO: Some canonicalization in lowering would simplify the number of 3158// patterns we have to try to match. 3159multiclass scalar_math_f32_patterns<SDNode Op, string OpcPrefix> { 3160 let Predicates = [UseSSE1] in { 3161 // extracted scalar math op with insert via movss 3162 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3163 (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3164 FR32:$src))))), 3165 (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, 3166 (COPY_TO_REGCLASS FR32:$src, VR128))>; 3167 3168 // vector math op with insert via movss 3169 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3170 (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3171 (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>; 3172 } 3173 3174 // With SSE 4.1, insertps/blendi are preferred to movsd, so match those too. 3175 let Predicates = [UseSSE41] in { 3176 // extracted scalar math op with insert via insertps 3177 def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3178 (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3179 FR32:$src))), (iPTR 0))), 3180 (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, 3181 (COPY_TO_REGCLASS FR32:$src, VR128))>; 3182 3183 // extracted scalar math op with insert via blend 3184 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3185 (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3186 FR32:$src))), (i8 1))), 3187 (!cast<I>(OpcPrefix#SSrr_Int) v4f32:$dst, 3188 (COPY_TO_REGCLASS FR32:$src, VR128))>; 3189 3190 // vector math op with insert via blend 3191 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3192 (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3193 (!cast<I>(OpcPrefix#SSrr_Int)v4f32:$dst, v4f32:$src)>; 3194 3195 } 3196 3197 // Repeat everything for AVX, except for the movss + scalar combo... 3198 // because that one shouldn't occur with AVX codegen? 3199 let Predicates = [HasAVX] in { 3200 // extracted scalar math op with insert via insertps 3201 def : Pat<(v4f32 (X86insertps (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3202 (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3203 FR32:$src))), (iPTR 0))), 3204 (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, 3205 (COPY_TO_REGCLASS FR32:$src, VR128))>; 3206 3207 // extracted scalar math op with insert via blend 3208 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), (v4f32 (scalar_to_vector 3209 (Op (f32 (vector_extract (v4f32 VR128:$dst), (iPTR 0))), 3210 FR32:$src))), (i8 1))), 3211 (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, 3212 (COPY_TO_REGCLASS FR32:$src, VR128))>; 3213 3214 // vector math op with insert via movss 3215 def : Pat<(v4f32 (X86Movss (v4f32 VR128:$dst), 3216 (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)))), 3217 (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>; 3218 3219 // vector math op with insert via blend 3220 def : Pat<(v4f32 (X86Blendi (v4f32 VR128:$dst), 3221 (Op (v4f32 VR128:$dst), (v4f32 VR128:$src)), (i8 1))), 3222 (!cast<I>("V"#OpcPrefix#SSrr_Int) v4f32:$dst, v4f32:$src)>; 3223 } 3224} 3225 3226defm : scalar_math_f32_patterns<fadd, "ADD">; 3227defm : scalar_math_f32_patterns<fsub, "SUB">; 3228defm : scalar_math_f32_patterns<fmul, "MUL">; 3229defm : scalar_math_f32_patterns<fdiv, "DIV">; 3230 3231multiclass scalar_math_f64_patterns<SDNode Op, string OpcPrefix> { 3232 let Predicates = [UseSSE2] in { 3233 // extracted scalar math op with insert via movsd 3234 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector 3235 (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3236 FR64:$src))))), 3237 (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, 3238 (COPY_TO_REGCLASS FR64:$src, VR128))>; 3239 3240 // vector math op with insert via movsd 3241 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3242 (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3243 (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; 3244 } 3245 3246 // With SSE 4.1, blendi is preferred to movsd, so match those too. 3247 let Predicates = [UseSSE41] in { 3248 // extracted scalar math op with insert via blend 3249 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector 3250 (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3251 FR64:$src))), (i8 1))), 3252 (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, 3253 (COPY_TO_REGCLASS FR64:$src, VR128))>; 3254 3255 // vector math op with insert via blend 3256 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3257 (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3258 (!cast<I>(OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; 3259 } 3260 3261 // Repeat everything for AVX. 3262 let Predicates = [HasAVX] in { 3263 // extracted scalar math op with insert via movsd 3264 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), (v2f64 (scalar_to_vector 3265 (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3266 FR64:$src))))), 3267 (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, 3268 (COPY_TO_REGCLASS FR64:$src, VR128))>; 3269 3270 // extracted scalar math op with insert via blend 3271 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), (v2f64 (scalar_to_vector 3272 (Op (f64 (vector_extract (v2f64 VR128:$dst), (iPTR 0))), 3273 FR64:$src))), (i8 1))), 3274 (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, 3275 (COPY_TO_REGCLASS FR64:$src, VR128))>; 3276 3277 // vector math op with insert via movsd 3278 def : Pat<(v2f64 (X86Movsd (v2f64 VR128:$dst), 3279 (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)))), 3280 (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; 3281 3282 // vector math op with insert via blend 3283 def : Pat<(v2f64 (X86Blendi (v2f64 VR128:$dst), 3284 (Op (v2f64 VR128:$dst), (v2f64 VR128:$src)), (i8 1))), 3285 (!cast<I>("V"#OpcPrefix#SDrr_Int) v2f64:$dst, v2f64:$src)>; 3286 } 3287} 3288 3289defm : scalar_math_f64_patterns<fadd, "ADD">; 3290defm : scalar_math_f64_patterns<fsub, "SUB">; 3291defm : scalar_math_f64_patterns<fmul, "MUL">; 3292defm : scalar_math_f64_patterns<fdiv, "DIV">; 3293 3294 3295/// Unop Arithmetic 3296/// In addition, we also have a special variant of the scalar form here to 3297/// represent the associated intrinsic operation. This form is unlike the 3298/// plain scalar form, in that it takes an entire vector (instead of a 3299/// scalar) and leaves the top elements undefined. 3300/// 3301/// And, we have a special variant form for a full-vector intrinsic form. 3302 3303let Sched = WriteFSqrt in { 3304def SSE_SQRTPS : OpndItins< 3305 IIC_SSE_SQRTPS_RR, IIC_SSE_SQRTPS_RM 3306>; 3307 3308def SSE_SQRTSS : OpndItins< 3309 IIC_SSE_SQRTSS_RR, IIC_SSE_SQRTSS_RM 3310>; 3311 3312def SSE_SQRTPD : OpndItins< 3313 IIC_SSE_SQRTPD_RR, IIC_SSE_SQRTPD_RM 3314>; 3315 3316def SSE_SQRTSD : OpndItins< 3317 IIC_SSE_SQRTSD_RR, IIC_SSE_SQRTSD_RM 3318>; 3319} 3320 3321let Sched = WriteFRsqrt in { 3322def SSE_RSQRTPS : OpndItins< 3323 IIC_SSE_RSQRTPS_RR, IIC_SSE_RSQRTPS_RM 3324>; 3325 3326def SSE_RSQRTSS : OpndItins< 3327 IIC_SSE_RSQRTSS_RR, IIC_SSE_RSQRTSS_RM 3328>; 3329} 3330 3331let Sched = WriteFRcp in { 3332def SSE_RCPP : OpndItins< 3333 IIC_SSE_RCPP_RR, IIC_SSE_RCPP_RM 3334>; 3335 3336def SSE_RCPS : OpndItins< 3337 IIC_SSE_RCPS_RR, IIC_SSE_RCPS_RM 3338>; 3339} 3340 3341/// sse_fp_unop_s - SSE1 unops in scalar form 3342/// For the non-AVX defs, we need $src1 to be tied to $dst because 3343/// the HW instructions are 2 operand / destructive. 3344multiclass sse_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 3345 ValueType vt, ValueType ScalarVT, 3346 X86MemOperand x86memop, Operand vec_memop, 3347 ComplexPattern mem_cpat, Intrinsic Intr, 3348 SDNode OpNode, OpndItins itins, Predicate target, 3349 string Suffix> { 3350 let hasSideEffects = 0 in { 3351 def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1), 3352 !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 3353 [(set RC:$dst, (OpNode RC:$src1))], itins.rr>, Sched<[itins.Sched]>, 3354 Requires<[target]>; 3355 let mayLoad = 1 in 3356 def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1), 3357 !strconcat(OpcodeStr, "\t{$src1, $dst|$dst, $src1}"), 3358 [(set RC:$dst, (OpNode (load addr:$src1)))], itins.rm>, 3359 Sched<[itins.Sched.Folded, ReadAfterLd]>, 3360 Requires<[target, OptForSize]>; 3361 3362 let isCodeGenOnly = 1, Constraints = "$src1 = $dst" in { 3363 def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 3364 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3365 []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3366 let mayLoad = 1 in 3367 def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, vec_memop:$src2), 3368 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3369 []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3370 } 3371 } 3372 3373 let Predicates = [target] in { 3374 def : Pat<(vt (OpNode mem_cpat:$src)), 3375 (vt (COPY_TO_REGCLASS (vt (!cast<Instruction>(NAME#Suffix##m_Int) 3376 (vt (IMPLICIT_DEF)), mem_cpat:$src)), RC))>; 3377 // These are unary operations, but they are modeled as having 2 source operands 3378 // because the high elements of the destination are unchanged in SSE. 3379 def : Pat<(Intr VR128:$src), 3380 (!cast<Instruction>(NAME#Suffix##r_Int) VR128:$src, VR128:$src)>; 3381 def : Pat<(Intr (load addr:$src)), 3382 (vt (COPY_TO_REGCLASS(!cast<Instruction>(NAME#Suffix##m) 3383 addr:$src), VR128))>; 3384 def : Pat<(Intr mem_cpat:$src), 3385 (!cast<Instruction>(NAME#Suffix##m_Int) 3386 (vt (IMPLICIT_DEF)), mem_cpat:$src)>; 3387 } 3388} 3389 3390multiclass avx_fp_unop_s<bits<8> opc, string OpcodeStr, RegisterClass RC, 3391 ValueType vt, ValueType ScalarVT, 3392 X86MemOperand x86memop, Operand vec_memop, 3393 ComplexPattern mem_cpat, 3394 Intrinsic Intr, SDNode OpNode, OpndItins itins, 3395 Predicate target, string Suffix> { 3396 let hasSideEffects = 0 in { 3397 def r : I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 3398 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3399 [], itins.rr>, Sched<[itins.Sched]>; 3400 let mayLoad = 1 in 3401 def m : I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 3402 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3403 [], itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3404 let isCodeGenOnly = 1 in { 3405 // todo: uncomment when all r_Int forms will be added to X86InstrInfo.cpp 3406 //def r_Int : I<opc, MRMSrcReg, (outs VR128:$dst), 3407 // (ins VR128:$src1, VR128:$src2), 3408 // !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3409 // []>, Sched<[itins.Sched.Folded]>; 3410 let mayLoad = 1 in 3411 def m_Int : I<opc, MRMSrcMem, (outs VR128:$dst), 3412 (ins VR128:$src1, vec_memop:$src2), 3413 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 3414 []>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3415 } 3416 } 3417 3418 let Predicates = [target] in { 3419 def : Pat<(OpNode RC:$src), (!cast<Instruction>("V"#NAME#Suffix##r) 3420 (ScalarVT (IMPLICIT_DEF)), RC:$src)>; 3421 3422 def : Pat<(vt (OpNode mem_cpat:$src)), 3423 (!cast<Instruction>("V"#NAME#Suffix##m_Int) (vt (IMPLICIT_DEF)), 3424 mem_cpat:$src)>; 3425 3426 // todo: use r_Int form when it will be ready 3427 //def : Pat<(Intr VR128:$src), (!cast<Instruction>("V"#NAME#Suffix##r_Int) 3428 // (VT (IMPLICIT_DEF)), VR128:$src)>; 3429 def : Pat<(Intr VR128:$src), 3430 (vt (COPY_TO_REGCLASS( 3431 !cast<Instruction>("V"#NAME#Suffix##r) (ScalarVT (IMPLICIT_DEF)), 3432 (ScalarVT (COPY_TO_REGCLASS VR128:$src, RC))), VR128))>; 3433 def : Pat<(Intr mem_cpat:$src), 3434 (!cast<Instruction>("V"#NAME#Suffix##m_Int) 3435 (vt (IMPLICIT_DEF)), mem_cpat:$src)>; 3436 } 3437 let Predicates = [target, OptForSize] in 3438 def : Pat<(ScalarVT (OpNode (load addr:$src))), 3439 (!cast<Instruction>("V"#NAME#Suffix##m) (ScalarVT (IMPLICIT_DEF)), 3440 addr:$src)>; 3441} 3442 3443/// sse1_fp_unop_p - SSE1 unops in packed form. 3444multiclass sse1_fp_unop_p<bits<8> opc, string OpcodeStr, SDNode OpNode, 3445 OpndItins itins> { 3446let Predicates = [HasAVX] in { 3447 def V#NAME#PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3448 !strconcat("v", OpcodeStr, 3449 "ps\t{$src, $dst|$dst, $src}"), 3450 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], 3451 itins.rr>, VEX, Sched<[itins.Sched]>; 3452 def V#NAME#PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3453 !strconcat("v", OpcodeStr, 3454 "ps\t{$src, $dst|$dst, $src}"), 3455 [(set VR128:$dst, (OpNode (loadv4f32 addr:$src)))], 3456 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3457 def V#NAME#PSYr : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3458 !strconcat("v", OpcodeStr, 3459 "ps\t{$src, $dst|$dst, $src}"), 3460 [(set VR256:$dst, (v8f32 (OpNode VR256:$src)))], 3461 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3462 def V#NAME#PSYm : PSI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3463 !strconcat("v", OpcodeStr, 3464 "ps\t{$src, $dst|$dst, $src}"), 3465 [(set VR256:$dst, (OpNode (loadv8f32 addr:$src)))], 3466 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3467} 3468 3469 def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3470 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3471 [(set VR128:$dst, (v4f32 (OpNode VR128:$src)))], itins.rr>, 3472 Sched<[itins.Sched]>; 3473 def PSm : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3474 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3475 [(set VR128:$dst, (OpNode (memopv4f32 addr:$src)))], itins.rm>, 3476 Sched<[itins.Sched.Folded]>; 3477} 3478 3479/// sse1_fp_unop_p_int - SSE1 intrinsics unops in packed forms. 3480multiclass sse1_fp_unop_p_int<bits<8> opc, string OpcodeStr, 3481 Intrinsic V4F32Int, Intrinsic V8F32Int, 3482 OpndItins itins> { 3483let isCodeGenOnly = 1 in { 3484let Predicates = [HasAVX] in { 3485 def V#NAME#PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3486 !strconcat("v", OpcodeStr, 3487 "ps\t{$src, $dst|$dst, $src}"), 3488 [(set VR128:$dst, (V4F32Int VR128:$src))], 3489 itins.rr>, VEX, Sched<[itins.Sched]>; 3490 def V#NAME#PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3491 !strconcat("v", OpcodeStr, 3492 "ps\t{$src, $dst|$dst, $src}"), 3493 [(set VR128:$dst, (V4F32Int (loadv4f32 addr:$src)))], 3494 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3495 def V#NAME#PSYr_Int : PSI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3496 !strconcat("v", OpcodeStr, 3497 "ps\t{$src, $dst|$dst, $src}"), 3498 [(set VR256:$dst, (V8F32Int VR256:$src))], 3499 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3500 def V#NAME#PSYm_Int : PSI<opc, MRMSrcMem, (outs VR256:$dst), 3501 (ins f256mem:$src), 3502 !strconcat("v", OpcodeStr, 3503 "ps\t{$src, $dst|$dst, $src}"), 3504 [(set VR256:$dst, (V8F32Int (loadv8f32 addr:$src)))], 3505 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3506} 3507 3508 def PSr_Int : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3509 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3510 [(set VR128:$dst, (V4F32Int VR128:$src))], 3511 itins.rr>, Sched<[itins.Sched]>; 3512 def PSm_Int : PSI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3513 !strconcat(OpcodeStr, "ps\t{$src, $dst|$dst, $src}"), 3514 [(set VR128:$dst, (V4F32Int (memopv4f32 addr:$src)))], 3515 itins.rm>, Sched<[itins.Sched.Folded]>; 3516} // isCodeGenOnly = 1 3517} 3518 3519/// sse2_fp_unop_p - SSE2 unops in vector forms. 3520multiclass sse2_fp_unop_p<bits<8> opc, string OpcodeStr, 3521 SDNode OpNode, OpndItins itins> { 3522let Predicates = [HasAVX] in { 3523 def V#NAME#PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3524 !strconcat("v", OpcodeStr, 3525 "pd\t{$src, $dst|$dst, $src}"), 3526 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], 3527 itins.rr>, VEX, Sched<[itins.Sched]>; 3528 def V#NAME#PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3529 !strconcat("v", OpcodeStr, 3530 "pd\t{$src, $dst|$dst, $src}"), 3531 [(set VR128:$dst, (OpNode (loadv2f64 addr:$src)))], 3532 itins.rm>, VEX, Sched<[itins.Sched.Folded]>; 3533 def V#NAME#PDYr : PDI<opc, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3534 !strconcat("v", OpcodeStr, 3535 "pd\t{$src, $dst|$dst, $src}"), 3536 [(set VR256:$dst, (v4f64 (OpNode VR256:$src)))], 3537 itins.rr>, VEX, VEX_L, Sched<[itins.Sched]>; 3538 def V#NAME#PDYm : PDI<opc, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 3539 !strconcat("v", OpcodeStr, 3540 "pd\t{$src, $dst|$dst, $src}"), 3541 [(set VR256:$dst, (OpNode (loadv4f64 addr:$src)))], 3542 itins.rm>, VEX, VEX_L, Sched<[itins.Sched.Folded]>; 3543} 3544 3545 def PDr : PDI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3546 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3547 [(set VR128:$dst, (v2f64 (OpNode VR128:$src)))], itins.rr>, 3548 Sched<[itins.Sched]>; 3549 def PDm : PDI<opc, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), 3550 !strconcat(OpcodeStr, "pd\t{$src, $dst|$dst, $src}"), 3551 [(set VR128:$dst, (OpNode (memopv2f64 addr:$src)))], itins.rm>, 3552 Sched<[itins.Sched.Folded]>; 3553} 3554 3555multiclass sse1_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 3556 OpndItins itins> { 3557 defm SS : sse_fp_unop_s<opc, OpcodeStr##ss, FR32, v4f32, f32, f32mem, 3558 ssmem, sse_load_f32, 3559 !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, 3560 itins, UseSSE1, "SS">, XS; 3561 defm V#NAME#SS : avx_fp_unop_s<opc, "v"#OpcodeStr##ss, FR32, v4f32, f32, 3562 f32mem, ssmem, sse_load_f32, 3563 !cast<Intrinsic>("int_x86_sse_"##OpcodeStr##_ss), OpNode, 3564 itins, UseAVX, "SS">, XS, VEX_4V, VEX_LIG; 3565} 3566 3567multiclass sse2_fp_unop_s<bits<8> opc, string OpcodeStr, SDNode OpNode, 3568 OpndItins itins> { 3569 defm SD : sse_fp_unop_s<opc, OpcodeStr##sd, FR64, v2f64, f64, f64mem, 3570 sdmem, sse_load_f64, 3571 !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), 3572 OpNode, itins, UseSSE2, "SD">, XD; 3573 defm V#NAME#SD : avx_fp_unop_s<opc, "v"#OpcodeStr##sd, FR64, v2f64, f64, 3574 f64mem, sdmem, sse_load_f64, 3575 !cast<Intrinsic>("int_x86_sse2_"##OpcodeStr##_sd), 3576 OpNode, itins, UseAVX, "SD">, XD, VEX_4V, VEX_LIG; 3577} 3578 3579// Square root. 3580defm SQRT : sse1_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSS>, 3581 sse1_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPS>, 3582 sse2_fp_unop_s<0x51, "sqrt", fsqrt, SSE_SQRTSD>, 3583 sse2_fp_unop_p<0x51, "sqrt", fsqrt, SSE_SQRTPD>; 3584 3585// Reciprocal approximations. Note that these typically require refinement 3586// in order to obtain suitable precision. 3587defm RSQRT : sse1_fp_unop_s<0x52, "rsqrt", X86frsqrt, SSE_RSQRTSS>, 3588 sse1_fp_unop_p<0x52, "rsqrt", X86frsqrt, SSE_RSQRTPS>, 3589 sse1_fp_unop_p_int<0x52, "rsqrt", int_x86_sse_rsqrt_ps, 3590 int_x86_avx_rsqrt_ps_256, SSE_RSQRTPS>; 3591defm RCP : sse1_fp_unop_s<0x53, "rcp", X86frcp, SSE_RCPS>, 3592 sse1_fp_unop_p<0x53, "rcp", X86frcp, SSE_RCPP>, 3593 sse1_fp_unop_p_int<0x53, "rcp", int_x86_sse_rcp_ps, 3594 int_x86_avx_rcp_ps_256, SSE_RCPP>; 3595 3596// There is no f64 version of the reciprocal approximation instructions. 3597 3598//===----------------------------------------------------------------------===// 3599// SSE 1 & 2 - Non-temporal stores 3600//===----------------------------------------------------------------------===// 3601 3602let AddedComplexity = 400 in { // Prefer non-temporal versions 3603let SchedRW = [WriteStore] in { 3604let Predicates = [HasAVX, NoVLX] in { 3605def VMOVNTPSmr : VPSI<0x2B, MRMDestMem, (outs), 3606 (ins f128mem:$dst, VR128:$src), 3607 "movntps\t{$src, $dst|$dst, $src}", 3608 [(alignednontemporalstore (v4f32 VR128:$src), 3609 addr:$dst)], 3610 IIC_SSE_MOVNT>, VEX; 3611def VMOVNTPDmr : VPDI<0x2B, MRMDestMem, (outs), 3612 (ins f128mem:$dst, VR128:$src), 3613 "movntpd\t{$src, $dst|$dst, $src}", 3614 [(alignednontemporalstore (v2f64 VR128:$src), 3615 addr:$dst)], 3616 IIC_SSE_MOVNT>, VEX; 3617 3618let ExeDomain = SSEPackedInt in 3619def VMOVNTDQmr : VPDI<0xE7, MRMDestMem, (outs), 3620 (ins f128mem:$dst, VR128:$src), 3621 "movntdq\t{$src, $dst|$dst, $src}", 3622 [(alignednontemporalstore (v2i64 VR128:$src), 3623 addr:$dst)], 3624 IIC_SSE_MOVNT>, VEX; 3625 3626def VMOVNTPSYmr : VPSI<0x2B, MRMDestMem, (outs), 3627 (ins f256mem:$dst, VR256:$src), 3628 "movntps\t{$src, $dst|$dst, $src}", 3629 [(alignednontemporalstore (v8f32 VR256:$src), 3630 addr:$dst)], 3631 IIC_SSE_MOVNT>, VEX, VEX_L; 3632def VMOVNTPDYmr : VPDI<0x2B, MRMDestMem, (outs), 3633 (ins f256mem:$dst, VR256:$src), 3634 "movntpd\t{$src, $dst|$dst, $src}", 3635 [(alignednontemporalstore (v4f64 VR256:$src), 3636 addr:$dst)], 3637 IIC_SSE_MOVNT>, VEX, VEX_L; 3638let ExeDomain = SSEPackedInt in 3639def VMOVNTDQYmr : VPDI<0xE7, MRMDestMem, (outs), 3640 (ins f256mem:$dst, VR256:$src), 3641 "movntdq\t{$src, $dst|$dst, $src}", 3642 [(alignednontemporalstore (v4i64 VR256:$src), 3643 addr:$dst)], 3644 IIC_SSE_MOVNT>, VEX, VEX_L; 3645} 3646 3647def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3648 "movntps\t{$src, $dst|$dst, $src}", 3649 [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)], 3650 IIC_SSE_MOVNT>; 3651def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3652 "movntpd\t{$src, $dst|$dst, $src}", 3653 [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)], 3654 IIC_SSE_MOVNT>; 3655 3656let ExeDomain = SSEPackedInt in 3657def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src), 3658 "movntdq\t{$src, $dst|$dst, $src}", 3659 [(alignednontemporalstore (v2i64 VR128:$src), addr:$dst)], 3660 IIC_SSE_MOVNT>; 3661 3662// There is no AVX form for instructions below this point 3663def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src), 3664 "movnti{l}\t{$src, $dst|$dst, $src}", 3665 [(nontemporalstore (i32 GR32:$src), addr:$dst)], 3666 IIC_SSE_MOVNT>, 3667 PS, Requires<[HasSSE2]>; 3668def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src), 3669 "movnti{q}\t{$src, $dst|$dst, $src}", 3670 [(nontemporalstore (i64 GR64:$src), addr:$dst)], 3671 IIC_SSE_MOVNT>, 3672 PS, Requires<[HasSSE2]>; 3673} // SchedRW = [WriteStore] 3674 3675let Predicates = [HasAVX2, NoVLX] in { 3676 def : Pat<(alignednontemporalstore (v8i32 VR256:$src), addr:$dst), 3677 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3678 def : Pat<(alignednontemporalstore (v16i16 VR256:$src), addr:$dst), 3679 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3680 def : Pat<(alignednontemporalstore (v32i8 VR256:$src), addr:$dst), 3681 (VMOVNTDQYmr addr:$dst, VR256:$src)>; 3682} 3683 3684let Predicates = [HasAVX, NoVLX] in { 3685 def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3686 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3687 def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3688 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3689 def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3690 (VMOVNTDQmr addr:$dst, VR128:$src)>; 3691} 3692 3693def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst), 3694 (MOVNTDQmr addr:$dst, VR128:$src)>; 3695def : Pat<(alignednontemporalstore (v8i16 VR128:$src), addr:$dst), 3696 (MOVNTDQmr addr:$dst, VR128:$src)>; 3697def : Pat<(alignednontemporalstore (v16i8 VR128:$src), addr:$dst), 3698 (MOVNTDQmr addr:$dst, VR128:$src)>; 3699 3700} // AddedComplexity 3701 3702//===----------------------------------------------------------------------===// 3703// SSE 1 & 2 - Prefetch and memory fence 3704//===----------------------------------------------------------------------===// 3705 3706// Prefetch intrinsic. 3707let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { 3708def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), 3709 "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], 3710 IIC_SSE_PREFETCH>, TB; 3711def PREFETCHT1 : I<0x18, MRM2m, (outs), (ins i8mem:$src), 3712 "prefetcht1\t$src", [(prefetch addr:$src, imm, (i32 2), (i32 1))], 3713 IIC_SSE_PREFETCH>, TB; 3714def PREFETCHT2 : I<0x18, MRM3m, (outs), (ins i8mem:$src), 3715 "prefetcht2\t$src", [(prefetch addr:$src, imm, (i32 1), (i32 1))], 3716 IIC_SSE_PREFETCH>, TB; 3717def PREFETCHNTA : I<0x18, MRM0m, (outs), (ins i8mem:$src), 3718 "prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0), (i32 1))], 3719 IIC_SSE_PREFETCH>, TB; 3720} 3721 3722// FIXME: How should flush instruction be modeled? 3723let SchedRW = [WriteLoad] in { 3724// Flush cache 3725def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src), 3726 "clflush\t$src", [(int_x86_sse2_clflush addr:$src)], 3727 IIC_SSE_PREFETCH>, PS, Requires<[HasSSE2]>; 3728} 3729 3730let SchedRW = [WriteNop] in { 3731// Pause. This "instruction" is encoded as "rep; nop", so even though it 3732// was introduced with SSE2, it's backward compatible. 3733def PAUSE : I<0x90, RawFrm, (outs), (ins), 3734 "pause", [(int_x86_sse2_pause)], IIC_SSE_PAUSE>, 3735 OBXS, Requires<[HasSSE2]>; 3736} 3737 3738let SchedRW = [WriteFence] in { 3739// Load, store, and memory fence 3740def SFENCE : I<0xAE, MRM_F8, (outs), (ins), 3741 "sfence", [(int_x86_sse_sfence)], IIC_SSE_SFENCE>, 3742 PS, Requires<[HasSSE1]>; 3743def LFENCE : I<0xAE, MRM_E8, (outs), (ins), 3744 "lfence", [(int_x86_sse2_lfence)], IIC_SSE_LFENCE>, 3745 TB, Requires<[HasSSE2]>; 3746def MFENCE : I<0xAE, MRM_F0, (outs), (ins), 3747 "mfence", [(int_x86_sse2_mfence)], IIC_SSE_MFENCE>, 3748 TB, Requires<[HasSSE2]>; 3749} // SchedRW 3750 3751def : Pat<(X86SFence), (SFENCE)>; 3752def : Pat<(X86LFence), (LFENCE)>; 3753def : Pat<(X86MFence), (MFENCE)>; 3754 3755//===----------------------------------------------------------------------===// 3756// SSE 1 & 2 - Load/Store XCSR register 3757//===----------------------------------------------------------------------===// 3758 3759def VLDMXCSR : VPSI<0xAE, MRM2m, (outs), (ins i32mem:$src), 3760 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3761 IIC_SSE_LDMXCSR>, VEX, Sched<[WriteLoad]>; 3762def VSTMXCSR : VPSI<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3763 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3764 IIC_SSE_STMXCSR>, VEX, Sched<[WriteStore]>; 3765 3766let Predicates = [UseSSE1] in { 3767def LDMXCSR : I<0xAE, MRM2m, (outs), (ins i32mem:$src), 3768 "ldmxcsr\t$src", [(int_x86_sse_ldmxcsr addr:$src)], 3769 IIC_SSE_LDMXCSR>, TB, Sched<[WriteLoad]>; 3770def STMXCSR : I<0xAE, MRM3m, (outs), (ins i32mem:$dst), 3771 "stmxcsr\t$dst", [(int_x86_sse_stmxcsr addr:$dst)], 3772 IIC_SSE_STMXCSR>, TB, Sched<[WriteStore]>; 3773} 3774 3775//===---------------------------------------------------------------------===// 3776// SSE2 - Move Aligned/Unaligned Packed Integer Instructions 3777//===---------------------------------------------------------------------===// 3778 3779let ExeDomain = SSEPackedInt in { // SSE integer instructions 3780 3781let hasSideEffects = 0, SchedRW = [WriteMove] in { 3782def VMOVDQArr : VPDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3783 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3784 VEX; 3785def VMOVDQAYrr : VPDI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3786 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>, 3787 VEX, VEX_L; 3788def VMOVDQUrr : VSSI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3789 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3790 VEX; 3791def VMOVDQUYrr : VSSI<0x6F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 3792 "movdqu\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVU_P_RR>, 3793 VEX, VEX_L; 3794} 3795 3796// For Disassembler 3797let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 3798 SchedRW = [WriteMove] in { 3799def VMOVDQArr_REV : VPDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3800 "movdqa\t{$src, $dst|$dst, $src}", [], 3801 IIC_SSE_MOVA_P_RR>, 3802 VEX; 3803def VMOVDQAYrr_REV : VPDI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3804 "movdqa\t{$src, $dst|$dst, $src}", [], 3805 IIC_SSE_MOVA_P_RR>, VEX, VEX_L; 3806def VMOVDQUrr_REV : VSSI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3807 "movdqu\t{$src, $dst|$dst, $src}", [], 3808 IIC_SSE_MOVU_P_RR>, 3809 VEX; 3810def VMOVDQUYrr_REV : VSSI<0x7F, MRMDestReg, (outs VR256:$dst), (ins VR256:$src), 3811 "movdqu\t{$src, $dst|$dst, $src}", [], 3812 IIC_SSE_MOVU_P_RR>, VEX, VEX_L; 3813} 3814 3815let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3816 hasSideEffects = 0, SchedRW = [WriteLoad] in { 3817def VMOVDQArm : VPDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3818 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3819 VEX; 3820def VMOVDQAYrm : VPDI<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3821 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RM>, 3822 VEX, VEX_L; 3823let Predicates = [HasAVX] in { 3824 def VMOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3825 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3826 XS, VEX; 3827 def VMOVDQUYrm : I<0x6F, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 3828 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_RM>, 3829 XS, VEX, VEX_L; 3830} 3831} 3832 3833let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in { 3834def VMOVDQAmr : VPDI<0x7F, MRMDestMem, (outs), 3835 (ins i128mem:$dst, VR128:$src), 3836 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3837 VEX; 3838def VMOVDQAYmr : VPDI<0x7F, MRMDestMem, (outs), 3839 (ins i256mem:$dst, VR256:$src), 3840 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_MR>, 3841 VEX, VEX_L; 3842let Predicates = [HasAVX] in { 3843def VMOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3844 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3845 XS, VEX; 3846def VMOVDQUYmr : I<0x7F, MRMDestMem, (outs), (ins i256mem:$dst, VR256:$src), 3847 "vmovdqu\t{$src, $dst|$dst, $src}",[], IIC_SSE_MOVU_P_MR>, 3848 XS, VEX, VEX_L; 3849} 3850} 3851 3852let SchedRW = [WriteMove] in { 3853let hasSideEffects = 0 in 3854def MOVDQArr : PDI<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3855 "movdqa\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVA_P_RR>; 3856 3857def MOVDQUrr : I<0x6F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 3858 "movdqu\t{$src, $dst|$dst, $src}", 3859 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3860 3861// For Disassembler 3862let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { 3863def MOVDQArr_REV : PDI<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3864 "movdqa\t{$src, $dst|$dst, $src}", [], 3865 IIC_SSE_MOVA_P_RR>; 3866 3867def MOVDQUrr_REV : I<0x7F, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 3868 "movdqu\t{$src, $dst|$dst, $src}", 3869 [], IIC_SSE_MOVU_P_RR>, XS, Requires<[UseSSE2]>; 3870} 3871} // SchedRW 3872 3873let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1, 3874 hasSideEffects = 0, SchedRW = [WriteLoad] in { 3875def MOVDQArm : PDI<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3876 "movdqa\t{$src, $dst|$dst, $src}", 3877 [/*(set VR128:$dst, (alignedloadv2i64 addr:$src))*/], 3878 IIC_SSE_MOVA_P_RM>; 3879def MOVDQUrm : I<0x6F, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 3880 "movdqu\t{$src, $dst|$dst, $src}", 3881 [/*(set VR128:$dst, (loadv2i64 addr:$src))*/], 3882 IIC_SSE_MOVU_P_RM>, 3883 XS, Requires<[UseSSE2]>; 3884} 3885 3886let mayStore = 1, hasSideEffects = 0, SchedRW = [WriteStore] in { 3887def MOVDQAmr : PDI<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3888 "movdqa\t{$src, $dst|$dst, $src}", 3889 [/*(alignedstore (v2i64 VR128:$src), addr:$dst)*/], 3890 IIC_SSE_MOVA_P_MR>; 3891def MOVDQUmr : I<0x7F, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src), 3892 "movdqu\t{$src, $dst|$dst, $src}", 3893 [/*(store (v2i64 VR128:$src), addr:$dst)*/], 3894 IIC_SSE_MOVU_P_MR>, 3895 XS, Requires<[UseSSE2]>; 3896} 3897 3898} // ExeDomain = SSEPackedInt 3899 3900let Predicates = [HasAVX] in { 3901 def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 3902 (VMOVDQUmr addr:$dst, VR128:$src)>; 3903 def : Pat<(int_x86_avx_storeu_dq_256 addr:$dst, VR256:$src), 3904 (VMOVDQUYmr addr:$dst, VR256:$src)>; 3905} 3906let Predicates = [UseSSE2] in 3907def : Pat<(int_x86_sse2_storeu_dq addr:$dst, VR128:$src), 3908 (MOVDQUmr addr:$dst, VR128:$src)>; 3909 3910//===---------------------------------------------------------------------===// 3911// SSE2 - Packed Integer Arithmetic Instructions 3912//===---------------------------------------------------------------------===// 3913 3914let Sched = WriteVecIMul in 3915def SSE_PMADD : OpndItins< 3916 IIC_SSE_PMADD, IIC_SSE_PMADD 3917>; 3918 3919let ExeDomain = SSEPackedInt in { // SSE integer instructions 3920 3921multiclass PDI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId, 3922 RegisterClass RC, PatFrag memop_frag, 3923 X86MemOperand x86memop, 3924 OpndItins itins, 3925 bit IsCommutable = 0, 3926 bit Is2Addr = 1> { 3927 let isCommutable = IsCommutable in 3928 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3929 (ins RC:$src1, RC:$src2), 3930 !if(Is2Addr, 3931 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3932 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3933 [(set RC:$dst, (IntId RC:$src1, RC:$src2))], itins.rr>, 3934 Sched<[itins.Sched]>; 3935 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3936 (ins RC:$src1, x86memop:$src2), 3937 !if(Is2Addr, 3938 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3939 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3940 [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))], 3941 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 3942} 3943 3944multiclass PDI_binop_all_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, 3945 Intrinsic IntId256, OpndItins itins, 3946 bit IsCommutable = 0> { 3947let Predicates = [HasAVX] in 3948 defm V#NAME : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId128, 3949 VR128, loadv2i64, i128mem, itins, 3950 IsCommutable, 0>, VEX_4V; 3951 3952let Constraints = "$src1 = $dst" in 3953 defm NAME : PDI_binop_rm_int<opc, OpcodeStr, IntId128, VR128, memopv2i64, 3954 i128mem, itins, IsCommutable, 1>; 3955 3956let Predicates = [HasAVX2] in 3957 defm V#NAME#Y : PDI_binop_rm_int<opc, !strconcat("v", OpcodeStr), IntId256, 3958 VR256, loadv4i64, i256mem, itins, 3959 IsCommutable, 0>, VEX_4V, VEX_L; 3960} 3961 3962multiclass PDI_binop_rmi<bits<8> opc, bits<8> opc2, Format ImmForm, 3963 string OpcodeStr, SDNode OpNode, 3964 SDNode OpNode2, RegisterClass RC, 3965 ValueType DstVT, ValueType SrcVT, PatFrag bc_frag, 3966 PatFrag ld_frag, ShiftOpndItins itins, 3967 bit Is2Addr = 1> { 3968 // src2 is always 128-bit 3969 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 3970 (ins RC:$src1, VR128:$src2), 3971 !if(Is2Addr, 3972 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3973 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3974 [(set RC:$dst, (DstVT (OpNode RC:$src1, (SrcVT VR128:$src2))))], 3975 itins.rr>, Sched<[WriteVecShift]>; 3976 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 3977 (ins RC:$src1, i128mem:$src2), 3978 !if(Is2Addr, 3979 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3980 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3981 [(set RC:$dst, (DstVT (OpNode RC:$src1, 3982 (bc_frag (ld_frag addr:$src2)))))], itins.rm>, 3983 Sched<[WriteVecShiftLd, ReadAfterLd]>; 3984 def ri : PDIi8<opc2, ImmForm, (outs RC:$dst), 3985 (ins RC:$src1, u8imm:$src2), 3986 !if(Is2Addr, 3987 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 3988 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 3989 [(set RC:$dst, (DstVT (OpNode2 RC:$src1, (i8 imm:$src2))))], itins.ri>, 3990 Sched<[WriteVecShift]>; 3991} 3992 3993/// PDI_binop_rm2 - Simple SSE2 binary operator with different src and dst types 3994multiclass PDI_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 3995 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 3996 PatFrag memop_frag, X86MemOperand x86memop, 3997 OpndItins itins, 3998 bit IsCommutable = 0, bit Is2Addr = 1> { 3999 let isCommutable = IsCommutable in 4000 def rr : PDI<opc, MRMSrcReg, (outs RC:$dst), 4001 (ins RC:$src1, RC:$src2), 4002 !if(Is2Addr, 4003 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4004 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4005 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 4006 Sched<[itins.Sched]>; 4007 def rm : PDI<opc, MRMSrcMem, (outs RC:$dst), 4008 (ins RC:$src1, x86memop:$src2), 4009 !if(Is2Addr, 4010 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4011 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4012 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 4013 (bitconvert (memop_frag addr:$src2)))))]>, 4014 Sched<[itins.Sched.Folded, ReadAfterLd]>; 4015} 4016} // ExeDomain = SSEPackedInt 4017 4018defm PADDB : PDI_binop_all<0xFC, "paddb", add, v16i8, v32i8, 4019 SSE_INTALU_ITINS_P, 1>; 4020defm PADDW : PDI_binop_all<0xFD, "paddw", add, v8i16, v16i16, 4021 SSE_INTALU_ITINS_P, 1>; 4022defm PADDD : PDI_binop_all<0xFE, "paddd", add, v4i32, v8i32, 4023 SSE_INTALU_ITINS_P, 1>; 4024defm PADDQ : PDI_binop_all<0xD4, "paddq", add, v2i64, v4i64, 4025 SSE_INTALUQ_ITINS_P, 1>; 4026defm PMULLW : PDI_binop_all<0xD5, "pmullw", mul, v8i16, v16i16, 4027 SSE_INTMUL_ITINS_P, 1>; 4028defm PMULHUW : PDI_binop_all<0xE4, "pmulhuw", mulhu, v8i16, v16i16, 4029 SSE_INTMUL_ITINS_P, 1>; 4030defm PMULHW : PDI_binop_all<0xE5, "pmulhw", mulhs, v8i16, v16i16, 4031 SSE_INTMUL_ITINS_P, 1>; 4032defm PSUBB : PDI_binop_all<0xF8, "psubb", sub, v16i8, v32i8, 4033 SSE_INTALU_ITINS_P, 0>; 4034defm PSUBW : PDI_binop_all<0xF9, "psubw", sub, v8i16, v16i16, 4035 SSE_INTALU_ITINS_P, 0>; 4036defm PSUBD : PDI_binop_all<0xFA, "psubd", sub, v4i32, v8i32, 4037 SSE_INTALU_ITINS_P, 0>; 4038defm PSUBQ : PDI_binop_all<0xFB, "psubq", sub, v2i64, v4i64, 4039 SSE_INTALUQ_ITINS_P, 0>; 4040defm PSUBUSB : PDI_binop_all<0xD8, "psubusb", X86subus, v16i8, v32i8, 4041 SSE_INTALU_ITINS_P, 0>; 4042defm PSUBUSW : PDI_binop_all<0xD9, "psubusw", X86subus, v8i16, v16i16, 4043 SSE_INTALU_ITINS_P, 0>; 4044defm PMINUB : PDI_binop_all<0xDA, "pminub", X86umin, v16i8, v32i8, 4045 SSE_INTALU_ITINS_P, 1>; 4046defm PMINSW : PDI_binop_all<0xEA, "pminsw", X86smin, v8i16, v16i16, 4047 SSE_INTALU_ITINS_P, 1>; 4048defm PMAXUB : PDI_binop_all<0xDE, "pmaxub", X86umax, v16i8, v32i8, 4049 SSE_INTALU_ITINS_P, 1>; 4050defm PMAXSW : PDI_binop_all<0xEE, "pmaxsw", X86smax, v8i16, v16i16, 4051 SSE_INTALU_ITINS_P, 1>; 4052 4053// Intrinsic forms 4054defm PSUBSB : PDI_binop_all_int<0xE8, "psubsb", int_x86_sse2_psubs_b, 4055 int_x86_avx2_psubs_b, SSE_INTALU_ITINS_P, 0>; 4056defm PSUBSW : PDI_binop_all_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, 4057 int_x86_avx2_psubs_w, SSE_INTALU_ITINS_P, 0>; 4058defm PADDSB : PDI_binop_all_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 4059 int_x86_avx2_padds_b, SSE_INTALU_ITINS_P, 1>; 4060defm PADDSW : PDI_binop_all_int<0xED, "paddsw" , int_x86_sse2_padds_w, 4061 int_x86_avx2_padds_w, SSE_INTALU_ITINS_P, 1>; 4062defm PADDUSB : PDI_binop_all_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 4063 int_x86_avx2_paddus_b, SSE_INTALU_ITINS_P, 1>; 4064defm PADDUSW : PDI_binop_all_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 4065 int_x86_avx2_paddus_w, SSE_INTALU_ITINS_P, 1>; 4066defm PMADDWD : PDI_binop_all_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 4067 int_x86_avx2_pmadd_wd, SSE_PMADD, 1>; 4068defm PAVGB : PDI_binop_all_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 4069 int_x86_avx2_pavg_b, SSE_INTALU_ITINS_P, 1>; 4070defm PAVGW : PDI_binop_all_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 4071 int_x86_avx2_pavg_w, SSE_INTALU_ITINS_P, 1>; 4072defm PSADBW : PDI_binop_all_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 4073 int_x86_avx2_psad_bw, SSE_PMADD, 1>; 4074 4075let Predicates = [HasAVX] in 4076defm VPMULUDQ : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v2i64, v4i32, VR128, 4077 loadv2i64, i128mem, SSE_INTMUL_ITINS_P, 1, 0>, 4078 VEX_4V; 4079let Predicates = [HasAVX2] in 4080defm VPMULUDQY : PDI_binop_rm2<0xF4, "vpmuludq", X86pmuludq, v4i64, v8i32, 4081 VR256, loadv4i64, i256mem, 4082 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 4083let Constraints = "$src1 = $dst" in 4084defm PMULUDQ : PDI_binop_rm2<0xF4, "pmuludq", X86pmuludq, v2i64, v4i32, VR128, 4085 memopv2i64, i128mem, SSE_INTMUL_ITINS_P, 1>; 4086 4087//===---------------------------------------------------------------------===// 4088// SSE2 - Packed Integer Logical Instructions 4089//===---------------------------------------------------------------------===// 4090 4091let Predicates = [HasAVX, NoVLX] in { 4092defm VPSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4093 VR128, v8i16, v8i16, bc_v8i16, loadv2i64, 4094 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4095defm VPSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4096 VR128, v4i32, v4i32, bc_v4i32, loadv2i64, 4097 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4098defm VPSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4099 VR128, v2i64, v2i64, bc_v2i64, loadv2i64, 4100 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4101 4102defm VPSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4103 VR128, v8i16, v8i16, bc_v8i16, loadv2i64, 4104 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4105defm VPSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4106 VR128, v4i32, v4i32, bc_v4i32, loadv2i64, 4107 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4108defm VPSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4109 VR128, v2i64, v2i64, bc_v2i64, loadv2i64, 4110 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4111 4112defm VPSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4113 VR128, v8i16, v8i16, bc_v8i16, loadv2i64, 4114 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4115defm VPSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4116 VR128, v4i32, v4i32, bc_v4i32, loadv2i64, 4117 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V; 4118 4119let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift] in { 4120 // 128-bit logical shifts. 4121 def VPSLLDQri : PDIi8<0x73, MRM7r, 4122 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 4123 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4124 [(set VR128:$dst, 4125 (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))]>, 4126 VEX_4V; 4127 def VPSRLDQri : PDIi8<0x73, MRM3r, 4128 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 4129 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4130 [(set VR128:$dst, 4131 (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))]>, 4132 VEX_4V; 4133 // PSRADQri doesn't exist in SSE[1-3]. 4134} 4135} // Predicates = [HasAVX] 4136 4137let Predicates = [HasAVX2, NoVLX] in { 4138defm VPSLLWY : PDI_binop_rmi<0xF1, 0x71, MRM6r, "vpsllw", X86vshl, X86vshli, 4139 VR256, v16i16, v8i16, bc_v8i16, loadv2i64, 4140 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4141defm VPSLLDY : PDI_binop_rmi<0xF2, 0x72, MRM6r, "vpslld", X86vshl, X86vshli, 4142 VR256, v8i32, v4i32, bc_v4i32, loadv2i64, 4143 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4144defm VPSLLQY : PDI_binop_rmi<0xF3, 0x73, MRM6r, "vpsllq", X86vshl, X86vshli, 4145 VR256, v4i64, v2i64, bc_v2i64, loadv2i64, 4146 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4147 4148defm VPSRLWY : PDI_binop_rmi<0xD1, 0x71, MRM2r, "vpsrlw", X86vsrl, X86vsrli, 4149 VR256, v16i16, v8i16, bc_v8i16, loadv2i64, 4150 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4151defm VPSRLDY : PDI_binop_rmi<0xD2, 0x72, MRM2r, "vpsrld", X86vsrl, X86vsrli, 4152 VR256, v8i32, v4i32, bc_v4i32, loadv2i64, 4153 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4154defm VPSRLQY : PDI_binop_rmi<0xD3, 0x73, MRM2r, "vpsrlq", X86vsrl, X86vsrli, 4155 VR256, v4i64, v2i64, bc_v2i64, loadv2i64, 4156 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4157 4158defm VPSRAWY : PDI_binop_rmi<0xE1, 0x71, MRM4r, "vpsraw", X86vsra, X86vsrai, 4159 VR256, v16i16, v8i16, bc_v8i16, loadv2i64, 4160 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4161defm VPSRADY : PDI_binop_rmi<0xE2, 0x72, MRM4r, "vpsrad", X86vsra, X86vsrai, 4162 VR256, v8i32, v4i32, bc_v4i32, loadv2i64, 4163 SSE_INTSHIFT_ITINS_P, 0>, VEX_4V, VEX_L; 4164 4165let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { 4166 // 256-bit logical shifts. 4167 def VPSLLDQYri : PDIi8<0x73, MRM7r, 4168 (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2), 4169 "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4170 [(set VR256:$dst, 4171 (v4i64 (X86vshldq VR256:$src1, (i8 imm:$src2))))]>, 4172 VEX_4V, VEX_L; 4173 def VPSRLDQYri : PDIi8<0x73, MRM3r, 4174 (outs VR256:$dst), (ins VR256:$src1, u8imm:$src2), 4175 "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4176 [(set VR256:$dst, 4177 (v4i64 (X86vshrdq VR256:$src1, (i8 imm:$src2))))]>, 4178 VEX_4V, VEX_L; 4179 // PSRADQYri doesn't exist in SSE[1-3]. 4180} 4181} // Predicates = [HasAVX2] 4182 4183let Constraints = "$src1 = $dst" in { 4184defm PSLLW : PDI_binop_rmi<0xF1, 0x71, MRM6r, "psllw", X86vshl, X86vshli, 4185 VR128, v8i16, v8i16, bc_v8i16, memopv2i64, 4186 SSE_INTSHIFT_ITINS_P>; 4187defm PSLLD : PDI_binop_rmi<0xF2, 0x72, MRM6r, "pslld", X86vshl, X86vshli, 4188 VR128, v4i32, v4i32, bc_v4i32, memopv2i64, 4189 SSE_INTSHIFT_ITINS_P>; 4190defm PSLLQ : PDI_binop_rmi<0xF3, 0x73, MRM6r, "psllq", X86vshl, X86vshli, 4191 VR128, v2i64, v2i64, bc_v2i64, memopv2i64, 4192 SSE_INTSHIFT_ITINS_P>; 4193 4194defm PSRLW : PDI_binop_rmi<0xD1, 0x71, MRM2r, "psrlw", X86vsrl, X86vsrli, 4195 VR128, v8i16, v8i16, bc_v8i16, memopv2i64, 4196 SSE_INTSHIFT_ITINS_P>; 4197defm PSRLD : PDI_binop_rmi<0xD2, 0x72, MRM2r, "psrld", X86vsrl, X86vsrli, 4198 VR128, v4i32, v4i32, bc_v4i32, memopv2i64, 4199 SSE_INTSHIFT_ITINS_P>; 4200defm PSRLQ : PDI_binop_rmi<0xD3, 0x73, MRM2r, "psrlq", X86vsrl, X86vsrli, 4201 VR128, v2i64, v2i64, bc_v2i64, memopv2i64, 4202 SSE_INTSHIFT_ITINS_P>; 4203 4204defm PSRAW : PDI_binop_rmi<0xE1, 0x71, MRM4r, "psraw", X86vsra, X86vsrai, 4205 VR128, v8i16, v8i16, bc_v8i16, memopv2i64, 4206 SSE_INTSHIFT_ITINS_P>; 4207defm PSRAD : PDI_binop_rmi<0xE2, 0x72, MRM4r, "psrad", X86vsra, X86vsrai, 4208 VR128, v4i32, v4i32, bc_v4i32, memopv2i64, 4209 SSE_INTSHIFT_ITINS_P>; 4210 4211let ExeDomain = SSEPackedInt, SchedRW = [WriteVecShift], hasSideEffects = 0 in { 4212 // 128-bit logical shifts. 4213 def PSLLDQri : PDIi8<0x73, MRM7r, 4214 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 4215 "pslldq\t{$src2, $dst|$dst, $src2}", 4216 [(set VR128:$dst, 4217 (v2i64 (X86vshldq VR128:$src1, (i8 imm:$src2))))], 4218 IIC_SSE_INTSHDQ_P_RI>; 4219 def PSRLDQri : PDIi8<0x73, MRM3r, 4220 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 4221 "psrldq\t{$src2, $dst|$dst, $src2}", 4222 [(set VR128:$dst, 4223 (v2i64 (X86vshrdq VR128:$src1, (i8 imm:$src2))))], 4224 IIC_SSE_INTSHDQ_P_RI>; 4225 // PSRADQri doesn't exist in SSE[1-3]. 4226} 4227} // Constraints = "$src1 = $dst" 4228 4229let Predicates = [HasAVX] in { 4230 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4231 (VPSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4232} 4233 4234let Predicates = [UseSSE2] in { 4235 def : Pat<(v2f64 (X86fsrl VR128:$src1, i32immSExt8:$src2)), 4236 (PSRLDQri VR128:$src1, (BYTE_imm imm:$src2))>; 4237} 4238 4239//===---------------------------------------------------------------------===// 4240// SSE2 - Packed Integer Comparison Instructions 4241//===---------------------------------------------------------------------===// 4242 4243defm PCMPEQB : PDI_binop_all<0x74, "pcmpeqb", X86pcmpeq, v16i8, v32i8, 4244 SSE_INTALU_ITINS_P, 1>; 4245defm PCMPEQW : PDI_binop_all<0x75, "pcmpeqw", X86pcmpeq, v8i16, v16i16, 4246 SSE_INTALU_ITINS_P, 1>; 4247defm PCMPEQD : PDI_binop_all<0x76, "pcmpeqd", X86pcmpeq, v4i32, v8i32, 4248 SSE_INTALU_ITINS_P, 1>; 4249defm PCMPGTB : PDI_binop_all<0x64, "pcmpgtb", X86pcmpgt, v16i8, v32i8, 4250 SSE_INTALU_ITINS_P, 0>; 4251defm PCMPGTW : PDI_binop_all<0x65, "pcmpgtw", X86pcmpgt, v8i16, v16i16, 4252 SSE_INTALU_ITINS_P, 0>; 4253defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, 4254 SSE_INTALU_ITINS_P, 0>; 4255 4256//===---------------------------------------------------------------------===// 4257// SSE2 - Packed Integer Shuffle Instructions 4258//===---------------------------------------------------------------------===// 4259 4260let ExeDomain = SSEPackedInt in { 4261multiclass sse2_pshuffle<string OpcodeStr, ValueType vt128, ValueType vt256, 4262 SDNode OpNode> { 4263let Predicates = [HasAVX] in { 4264 def V#NAME#ri : Ii8<0x70, MRMSrcReg, (outs VR128:$dst), 4265 (ins VR128:$src1, u8imm:$src2), 4266 !strconcat("v", OpcodeStr, 4267 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4268 [(set VR128:$dst, 4269 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4270 IIC_SSE_PSHUF_RI>, VEX, Sched<[WriteShuffle]>; 4271 def V#NAME#mi : Ii8<0x70, MRMSrcMem, (outs VR128:$dst), 4272 (ins i128mem:$src1, u8imm:$src2), 4273 !strconcat("v", OpcodeStr, 4274 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4275 [(set VR128:$dst, 4276 (vt128 (OpNode (bitconvert (loadv2i64 addr:$src1)), 4277 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, 4278 Sched<[WriteShuffleLd]>; 4279} 4280 4281let Predicates = [HasAVX2] in { 4282 def V#NAME#Yri : Ii8<0x70, MRMSrcReg, (outs VR256:$dst), 4283 (ins VR256:$src1, u8imm:$src2), 4284 !strconcat("v", OpcodeStr, 4285 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4286 [(set VR256:$dst, 4287 (vt256 (OpNode VR256:$src1, (i8 imm:$src2))))], 4288 IIC_SSE_PSHUF_RI>, VEX, VEX_L, Sched<[WriteShuffle]>; 4289 def V#NAME#Ymi : Ii8<0x70, MRMSrcMem, (outs VR256:$dst), 4290 (ins i256mem:$src1, u8imm:$src2), 4291 !strconcat("v", OpcodeStr, 4292 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4293 [(set VR256:$dst, 4294 (vt256 (OpNode (bitconvert (loadv4i64 addr:$src1)), 4295 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, VEX, VEX_L, 4296 Sched<[WriteShuffleLd]>; 4297} 4298 4299let Predicates = [UseSSE2] in { 4300 def ri : Ii8<0x70, MRMSrcReg, 4301 (outs VR128:$dst), (ins VR128:$src1, u8imm:$src2), 4302 !strconcat(OpcodeStr, 4303 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4304 [(set VR128:$dst, 4305 (vt128 (OpNode VR128:$src1, (i8 imm:$src2))))], 4306 IIC_SSE_PSHUF_RI>, Sched<[WriteShuffle]>; 4307 def mi : Ii8<0x70, MRMSrcMem, 4308 (outs VR128:$dst), (ins i128mem:$src1, u8imm:$src2), 4309 !strconcat(OpcodeStr, 4310 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4311 [(set VR128:$dst, 4312 (vt128 (OpNode (bitconvert (memopv2i64 addr:$src1)), 4313 (i8 imm:$src2))))], IIC_SSE_PSHUF_MI>, 4314 Sched<[WriteShuffleLd, ReadAfterLd]>; 4315} 4316} 4317} // ExeDomain = SSEPackedInt 4318 4319defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, v8i32, X86PShufd>, PD; 4320defm PSHUFHW : sse2_pshuffle<"pshufhw", v8i16, v16i16, X86PShufhw>, XS; 4321defm PSHUFLW : sse2_pshuffle<"pshuflw", v8i16, v16i16, X86PShuflw>, XD; 4322 4323let Predicates = [HasAVX] in { 4324 def : Pat<(v4f32 (X86PShufd (loadv4f32 addr:$src1), (i8 imm:$imm))), 4325 (VPSHUFDmi addr:$src1, imm:$imm)>; 4326 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4327 (VPSHUFDri VR128:$src1, imm:$imm)>; 4328} 4329 4330let Predicates = [UseSSE2] in { 4331 def : Pat<(v4f32 (X86PShufd (memopv4f32 addr:$src1), (i8 imm:$imm))), 4332 (PSHUFDmi addr:$src1, imm:$imm)>; 4333 def : Pat<(v4f32 (X86PShufd VR128:$src1, (i8 imm:$imm))), 4334 (PSHUFDri VR128:$src1, imm:$imm)>; 4335} 4336 4337//===---------------------------------------------------------------------===// 4338// Packed Integer Pack Instructions (SSE & AVX) 4339//===---------------------------------------------------------------------===// 4340 4341let ExeDomain = SSEPackedInt in { 4342multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 4343 ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, 4344 PatFrag ld_frag, bit Is2Addr = 1> { 4345 def rr : PDI<opc, MRMSrcReg, 4346 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4347 !if(Is2Addr, 4348 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4349 !strconcat(OpcodeStr, 4350 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4351 [(set VR128:$dst, 4352 (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, 4353 Sched<[WriteShuffle]>; 4354 def rm : PDI<opc, MRMSrcMem, 4355 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4356 !if(Is2Addr, 4357 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4358 !strconcat(OpcodeStr, 4359 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4360 [(set VR128:$dst, 4361 (OutVT (OpNode VR128:$src1, 4362 (bc_frag (ld_frag addr:$src2)))))]>, 4363 Sched<[WriteShuffleLd, ReadAfterLd]>; 4364} 4365 4366multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, 4367 ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { 4368 def Yrr : PDI<opc, MRMSrcReg, 4369 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4370 !strconcat(OpcodeStr, 4371 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4372 [(set VR256:$dst, 4373 (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>, 4374 Sched<[WriteShuffle]>; 4375 def Yrm : PDI<opc, MRMSrcMem, 4376 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4377 !strconcat(OpcodeStr, 4378 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4379 [(set VR256:$dst, 4380 (OutVT (OpNode VR256:$src1, 4381 (bc_frag (loadv4i64 addr:$src2)))))]>, 4382 Sched<[WriteShuffleLd, ReadAfterLd]>; 4383} 4384 4385multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, 4386 ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, 4387 PatFrag ld_frag, bit Is2Addr = 1> { 4388 def rr : SS48I<opc, MRMSrcReg, 4389 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4390 !if(Is2Addr, 4391 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4392 !strconcat(OpcodeStr, 4393 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4394 [(set VR128:$dst, 4395 (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, 4396 Sched<[WriteShuffle]>; 4397 def rm : SS48I<opc, MRMSrcMem, 4398 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4399 !if(Is2Addr, 4400 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 4401 !strconcat(OpcodeStr, 4402 "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4403 [(set VR128:$dst, 4404 (OutVT (OpNode VR128:$src1, 4405 (bc_frag (ld_frag addr:$src2)))))]>, 4406 Sched<[WriteShuffleLd, ReadAfterLd]>; 4407} 4408 4409multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, 4410 ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { 4411 def Yrr : SS48I<opc, MRMSrcReg, 4412 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4413 !strconcat(OpcodeStr, 4414 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4415 [(set VR256:$dst, 4416 (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>, 4417 Sched<[WriteShuffle]>; 4418 def Yrm : SS48I<opc, MRMSrcMem, 4419 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4420 !strconcat(OpcodeStr, 4421 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4422 [(set VR256:$dst, 4423 (OutVT (OpNode VR256:$src1, 4424 (bc_frag (loadv4i64 addr:$src2)))))]>, 4425 Sched<[WriteShuffleLd, ReadAfterLd]>; 4426} 4427 4428let Predicates = [HasAVX] in { 4429 defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, 4430 bc_v8i16, loadv2i64, 0>, VEX_4V; 4431 defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, 4432 bc_v4i32, loadv2i64, 0>, VEX_4V; 4433 4434 defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, 4435 bc_v8i16, loadv2i64, 0>, VEX_4V; 4436 defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, 4437 bc_v4i32, loadv2i64, 0>, VEX_4V; 4438} 4439 4440let Predicates = [HasAVX2] in { 4441 defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss, 4442 bc_v16i16>, VEX_4V, VEX_L; 4443 defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, 4444 bc_v8i32>, VEX_4V, VEX_L; 4445 4446 defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus, 4447 bc_v16i16>, VEX_4V, VEX_L; 4448 defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, 4449 bc_v8i32>, VEX_4V, VEX_L; 4450} 4451 4452let Constraints = "$src1 = $dst" in { 4453 defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, 4454 bc_v8i16, memopv2i64>; 4455 defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, 4456 bc_v4i32, memopv2i64>; 4457 4458 defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, 4459 bc_v8i16, memopv2i64>; 4460 4461 let Predicates = [HasSSE41] in 4462 defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, 4463 bc_v4i32, memopv2i64>; 4464} 4465} // ExeDomain = SSEPackedInt 4466 4467//===---------------------------------------------------------------------===// 4468// SSE2 - Packed Integer Unpack Instructions 4469//===---------------------------------------------------------------------===// 4470 4471let ExeDomain = SSEPackedInt in { 4472multiclass sse2_unpack<bits<8> opc, string OpcodeStr, ValueType vt, 4473 SDNode OpNode, PatFrag bc_frag, PatFrag ld_frag, 4474 bit Is2Addr = 1> { 4475 def rr : PDI<opc, MRMSrcReg, 4476 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), 4477 !if(Is2Addr, 4478 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4479 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4480 [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], 4481 IIC_SSE_UNPCK>, Sched<[WriteShuffle]>; 4482 def rm : PDI<opc, MRMSrcMem, 4483 (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), 4484 !if(Is2Addr, 4485 !strconcat(OpcodeStr,"\t{$src2, $dst|$dst, $src2}"), 4486 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 4487 [(set VR128:$dst, (OpNode VR128:$src1, 4488 (bc_frag (ld_frag addr:$src2))))], 4489 IIC_SSE_UNPCK>, 4490 Sched<[WriteShuffleLd, ReadAfterLd]>; 4491} 4492 4493multiclass sse2_unpack_y<bits<8> opc, string OpcodeStr, ValueType vt, 4494 SDNode OpNode, PatFrag bc_frag> { 4495 def Yrr : PDI<opc, MRMSrcReg, 4496 (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), 4497 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4498 [(set VR256:$dst, (vt (OpNode VR256:$src1, VR256:$src2)))]>, 4499 Sched<[WriteShuffle]>; 4500 def Yrm : PDI<opc, MRMSrcMem, 4501 (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), 4502 !strconcat(OpcodeStr,"\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 4503 [(set VR256:$dst, (OpNode VR256:$src1, 4504 (bc_frag (loadv4i64 addr:$src2))))]>, 4505 Sched<[WriteShuffleLd, ReadAfterLd]>; 4506} 4507 4508let Predicates = [HasAVX] in { 4509 defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, 4510 bc_v16i8, loadv2i64, 0>, VEX_4V; 4511 defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, 4512 bc_v8i16, loadv2i64, 0>, VEX_4V; 4513 defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, 4514 bc_v4i32, loadv2i64, 0>, VEX_4V; 4515 defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, 4516 bc_v2i64, loadv2i64, 0>, VEX_4V; 4517 4518 defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, 4519 bc_v16i8, loadv2i64, 0>, VEX_4V; 4520 defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, 4521 bc_v8i16, loadv2i64, 0>, VEX_4V; 4522 defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, 4523 bc_v4i32, loadv2i64, 0>, VEX_4V; 4524 defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, 4525 bc_v2i64, loadv2i64, 0>, VEX_4V; 4526} 4527 4528let Predicates = [HasAVX2] in { 4529 defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, 4530 bc_v32i8>, VEX_4V, VEX_L; 4531 defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, 4532 bc_v16i16>, VEX_4V, VEX_L; 4533 defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, 4534 bc_v8i32>, VEX_4V, VEX_L; 4535 defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, 4536 bc_v4i64>, VEX_4V, VEX_L; 4537 4538 defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, 4539 bc_v32i8>, VEX_4V, VEX_L; 4540 defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, 4541 bc_v16i16>, VEX_4V, VEX_L; 4542 defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, 4543 bc_v8i32>, VEX_4V, VEX_L; 4544 defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, 4545 bc_v4i64>, VEX_4V, VEX_L; 4546} 4547 4548let Constraints = "$src1 = $dst" in { 4549 defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Unpckl, 4550 bc_v16i8, memopv2i64>; 4551 defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Unpckl, 4552 bc_v8i16, memopv2i64>; 4553 defm PUNPCKLDQ : sse2_unpack<0x62, "punpckldq", v4i32, X86Unpckl, 4554 bc_v4i32, memopv2i64>; 4555 defm PUNPCKLQDQ : sse2_unpack<0x6C, "punpcklqdq", v2i64, X86Unpckl, 4556 bc_v2i64, memopv2i64>; 4557 4558 defm PUNPCKHBW : sse2_unpack<0x68, "punpckhbw", v16i8, X86Unpckh, 4559 bc_v16i8, memopv2i64>; 4560 defm PUNPCKHWD : sse2_unpack<0x69, "punpckhwd", v8i16, X86Unpckh, 4561 bc_v8i16, memopv2i64>; 4562 defm PUNPCKHDQ : sse2_unpack<0x6A, "punpckhdq", v4i32, X86Unpckh, 4563 bc_v4i32, memopv2i64>; 4564 defm PUNPCKHQDQ : sse2_unpack<0x6D, "punpckhqdq", v2i64, X86Unpckh, 4565 bc_v2i64, memopv2i64>; 4566} 4567} // ExeDomain = SSEPackedInt 4568 4569//===---------------------------------------------------------------------===// 4570// SSE2 - Packed Integer Extract and Insert 4571//===---------------------------------------------------------------------===// 4572 4573let ExeDomain = SSEPackedInt in { 4574multiclass sse2_pinsrw<bit Is2Addr = 1> { 4575 def rri : Ii8<0xC4, MRMSrcReg, 4576 (outs VR128:$dst), (ins VR128:$src1, 4577 GR32orGR64:$src2, u8imm:$src3), 4578 !if(Is2Addr, 4579 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4580 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4581 [(set VR128:$dst, 4582 (X86pinsrw VR128:$src1, GR32orGR64:$src2, imm:$src3))], 4583 IIC_SSE_PINSRW>, Sched<[WriteShuffle]>; 4584 def rmi : Ii8<0xC4, MRMSrcMem, 4585 (outs VR128:$dst), (ins VR128:$src1, 4586 i16mem:$src2, u8imm:$src3), 4587 !if(Is2Addr, 4588 "pinsrw\t{$src3, $src2, $dst|$dst, $src2, $src3}", 4589 "vpinsrw\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 4590 [(set VR128:$dst, 4591 (X86pinsrw VR128:$src1, (extloadi16 addr:$src2), 4592 imm:$src3))], IIC_SSE_PINSRW>, 4593 Sched<[WriteShuffleLd, ReadAfterLd]>; 4594} 4595 4596// Extract 4597let Predicates = [HasAVX] in 4598def VPEXTRWri : Ii8<0xC5, MRMSrcReg, 4599 (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 4600 "vpextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4601 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4602 imm:$src2))]>, PD, VEX, 4603 Sched<[WriteShuffle]>; 4604def PEXTRWri : PDIi8<0xC5, MRMSrcReg, 4605 (outs GR32orGR64:$dst), (ins VR128:$src1, u8imm:$src2), 4606 "pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}", 4607 [(set GR32orGR64:$dst, (X86pextrw (v8i16 VR128:$src1), 4608 imm:$src2))], IIC_SSE_PEXTRW>, 4609 Sched<[WriteShuffleLd, ReadAfterLd]>; 4610 4611// Insert 4612let Predicates = [HasAVX] in 4613defm VPINSRW : sse2_pinsrw<0>, PD, VEX_4V; 4614 4615let Predicates = [UseSSE2], Constraints = "$src1 = $dst" in 4616defm PINSRW : sse2_pinsrw, PD; 4617 4618} // ExeDomain = SSEPackedInt 4619 4620//===---------------------------------------------------------------------===// 4621// SSE2 - Packed Mask Creation 4622//===---------------------------------------------------------------------===// 4623 4624let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { 4625 4626def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4627 (ins VR128:$src), 4628 "pmovmskb\t{$src, $dst|$dst, $src}", 4629 [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4630 IIC_SSE_MOVMSK>, VEX; 4631 4632let Predicates = [HasAVX2] in { 4633def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), 4634 (ins VR256:$src), 4635 "pmovmskb\t{$src, $dst|$dst, $src}", 4636 [(set GR32orGR64:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, 4637 VEX, VEX_L; 4638} 4639 4640def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32orGR64:$dst), (ins VR128:$src), 4641 "pmovmskb\t{$src, $dst|$dst, $src}", 4642 [(set GR32orGR64:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))], 4643 IIC_SSE_MOVMSK>; 4644 4645} // ExeDomain = SSEPackedInt 4646 4647//===---------------------------------------------------------------------===// 4648// SSE2 - Conditional Store 4649//===---------------------------------------------------------------------===// 4650 4651let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { 4652 4653let Uses = [EDI], Predicates = [HasAVX,Not64BitMode] in 4654def VMASKMOVDQU : VPDI<0xF7, MRMSrcReg, (outs), 4655 (ins VR128:$src, VR128:$mask), 4656 "maskmovdqu\t{$mask, $src|$src, $mask}", 4657 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4658 IIC_SSE_MASKMOV>, VEX; 4659let Uses = [RDI], Predicates = [HasAVX,In64BitMode] in 4660def VMASKMOVDQU64 : VPDI<0xF7, MRMSrcReg, (outs), 4661 (ins VR128:$src, VR128:$mask), 4662 "maskmovdqu\t{$mask, $src|$src, $mask}", 4663 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4664 IIC_SSE_MASKMOV>, VEX; 4665 4666let Uses = [EDI], Predicates = [UseSSE2,Not64BitMode] in 4667def MASKMOVDQU : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4668 "maskmovdqu\t{$mask, $src|$src, $mask}", 4669 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, EDI)], 4670 IIC_SSE_MASKMOV>; 4671let Uses = [RDI], Predicates = [UseSSE2,In64BitMode] in 4672def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask), 4673 "maskmovdqu\t{$mask, $src|$src, $mask}", 4674 [(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)], 4675 IIC_SSE_MASKMOV>; 4676 4677} // ExeDomain = SSEPackedInt 4678 4679//===---------------------------------------------------------------------===// 4680// SSE2 - Move Doubleword 4681//===---------------------------------------------------------------------===// 4682 4683//===---------------------------------------------------------------------===// 4684// Move Int Doubleword to Packed Double Int 4685// 4686def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4687 "movd\t{$src, $dst|$dst, $src}", 4688 [(set VR128:$dst, 4689 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4690 VEX, Sched<[WriteMove]>; 4691def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4692 "movd\t{$src, $dst|$dst, $src}", 4693 [(set VR128:$dst, 4694 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4695 IIC_SSE_MOVDQ>, 4696 VEX, Sched<[WriteLoad]>; 4697def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4698 "movq\t{$src, $dst|$dst, $src}", 4699 [(set VR128:$dst, 4700 (v2i64 (scalar_to_vector GR64:$src)))], 4701 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4702let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4703def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4704 "movq\t{$src, $dst|$dst, $src}", 4705 [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteLoad]>; 4706let isCodeGenOnly = 1 in 4707def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4708 "movq\t{$src, $dst|$dst, $src}", 4709 [(set FR64:$dst, (bitconvert GR64:$src))], 4710 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4711 4712def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), 4713 "movd\t{$src, $dst|$dst, $src}", 4714 [(set VR128:$dst, 4715 (v4i32 (scalar_to_vector GR32:$src)))], IIC_SSE_MOVDQ>, 4716 Sched<[WriteMove]>; 4717def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), 4718 "movd\t{$src, $dst|$dst, $src}", 4719 [(set VR128:$dst, 4720 (v4i32 (scalar_to_vector (loadi32 addr:$src))))], 4721 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4722def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4723 "mov{d|q}\t{$src, $dst|$dst, $src}", 4724 [(set VR128:$dst, 4725 (v2i64 (scalar_to_vector GR64:$src)))], 4726 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4727let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in 4728def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4729 "mov{d|q}\t{$src, $dst|$dst, $src}", 4730 [], IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4731let isCodeGenOnly = 1 in 4732def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), 4733 "mov{d|q}\t{$src, $dst|$dst, $src}", 4734 [(set FR64:$dst, (bitconvert GR64:$src))], 4735 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4736 4737//===---------------------------------------------------------------------===// 4738// Move Int Doubleword to Single Scalar 4739// 4740let isCodeGenOnly = 1 in { 4741 def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4742 "movd\t{$src, $dst|$dst, $src}", 4743 [(set FR32:$dst, (bitconvert GR32:$src))], 4744 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4745 4746 def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4747 "movd\t{$src, $dst|$dst, $src}", 4748 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4749 IIC_SSE_MOVDQ>, 4750 VEX, Sched<[WriteLoad]>; 4751 def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), 4752 "movd\t{$src, $dst|$dst, $src}", 4753 [(set FR32:$dst, (bitconvert GR32:$src))], 4754 IIC_SSE_MOVDQ>, Sched<[WriteMove]>; 4755 4756 def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), 4757 "movd\t{$src, $dst|$dst, $src}", 4758 [(set FR32:$dst, (bitconvert (loadi32 addr:$src)))], 4759 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4760} 4761 4762//===---------------------------------------------------------------------===// 4763// Move Packed Doubleword Int to Packed Double Int 4764// 4765def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4766 "movd\t{$src, $dst|$dst, $src}", 4767 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4768 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, VEX, 4769 Sched<[WriteMove]>; 4770def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), 4771 (ins i32mem:$dst, VR128:$src), 4772 "movd\t{$src, $dst|$dst, $src}", 4773 [(store (i32 (vector_extract (v4i32 VR128:$src), 4774 (iPTR 0))), addr:$dst)], IIC_SSE_MOVDQ>, 4775 VEX, Sched<[WriteStore]>; 4776def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), 4777 "movd\t{$src, $dst|$dst, $src}", 4778 [(set GR32:$dst, (vector_extract (v4i32 VR128:$src), 4779 (iPTR 0)))], IIC_SSE_MOVD_ToGP>, 4780 Sched<[WriteMove]>; 4781def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), 4782 "movd\t{$src, $dst|$dst, $src}", 4783 [(store (i32 (vector_extract (v4i32 VR128:$src), 4784 (iPTR 0))), addr:$dst)], 4785 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4786 4787def : Pat<(v8i32 (X86Vinsert (v8i32 immAllZerosV), GR32:$src2, (iPTR 0))), 4788 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 4789 4790def : Pat<(v4i64 (X86Vinsert (bc_v4i64 (v8i32 immAllZerosV)), GR64:$src2, (iPTR 0))), 4791 (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 4792 4793def : Pat<(v8i32 (X86Vinsert undef, GR32:$src2, (iPTR 0))), 4794 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src2), sub_xmm)>; 4795 4796def : Pat<(v4i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), 4797 (SUBREG_TO_REG (i32 0), (VMOV64toPQIrr GR64:$src2), sub_xmm)>; 4798 4799//===---------------------------------------------------------------------===// 4800// Move Packed Doubleword Int first element to Doubleword Int 4801// 4802let SchedRW = [WriteMove] in { 4803def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4804 "movq\t{$src, $dst|$dst, $src}", 4805 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4806 (iPTR 0)))], 4807 IIC_SSE_MOVD_ToGP>, 4808 VEX; 4809 4810def MOVPQIto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), 4811 "mov{d|q}\t{$src, $dst|$dst, $src}", 4812 [(set GR64:$dst, (vector_extract (v2i64 VR128:$src), 4813 (iPTR 0)))], 4814 IIC_SSE_MOVD_ToGP>; 4815} //SchedRW 4816 4817let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4818def VMOVPQIto64rm : VRS2I<0x7E, MRMDestMem, (outs i64mem:$dst), 4819 (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", 4820 [], IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4821let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in 4822def MOVPQIto64rm : RS2I<0x7E, MRMDestMem, (outs i64mem:$dst), (ins VR128:$src), 4823 "mov{d|q}\t{$src, $dst|$dst, $src}", 4824 [], IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4825 4826//===---------------------------------------------------------------------===// 4827// Bitcast FR64 <-> GR64 4828// 4829let isCodeGenOnly = 1 in { 4830 let Predicates = [UseAVX] in 4831 def VMOV64toSDrm : VS2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4832 "movq\t{$src, $dst|$dst, $src}", 4833 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))]>, 4834 VEX, Sched<[WriteLoad]>; 4835 def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4836 "movq\t{$src, $dst|$dst, $src}", 4837 [(set GR64:$dst, (bitconvert FR64:$src))], 4838 IIC_SSE_MOVDQ>, VEX, Sched<[WriteMove]>; 4839 def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4840 "movq\t{$src, $dst|$dst, $src}", 4841 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4842 IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4843 4844 def MOV64toSDrm : S2SI<0x7E, MRMSrcMem, (outs FR64:$dst), (ins i64mem:$src), 4845 "movq\t{$src, $dst|$dst, $src}", 4846 [(set FR64:$dst, (bitconvert (loadi64 addr:$src)))], 4847 IIC_SSE_MOVDQ>, Sched<[WriteLoad]>; 4848 def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), 4849 "mov{d|q}\t{$src, $dst|$dst, $src}", 4850 [(set GR64:$dst, (bitconvert FR64:$src))], 4851 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 4852 def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), 4853 "movq\t{$src, $dst|$dst, $src}", 4854 [(store (i64 (bitconvert FR64:$src)), addr:$dst)], 4855 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4856} 4857 4858//===---------------------------------------------------------------------===// 4859// Move Scalar Single to Double Int 4860// 4861let isCodeGenOnly = 1 in { 4862 def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4863 "movd\t{$src, $dst|$dst, $src}", 4864 [(set GR32:$dst, (bitconvert FR32:$src))], 4865 IIC_SSE_MOVD_ToGP>, VEX, Sched<[WriteMove]>; 4866 def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4867 "movd\t{$src, $dst|$dst, $src}", 4868 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4869 IIC_SSE_MOVDQ>, VEX, Sched<[WriteStore]>; 4870 def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), 4871 "movd\t{$src, $dst|$dst, $src}", 4872 [(set GR32:$dst, (bitconvert FR32:$src))], 4873 IIC_SSE_MOVD_ToGP>, Sched<[WriteMove]>; 4874 def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), 4875 "movd\t{$src, $dst|$dst, $src}", 4876 [(store (i32 (bitconvert FR32:$src)), addr:$dst)], 4877 IIC_SSE_MOVDQ>, Sched<[WriteStore]>; 4878} 4879 4880//===---------------------------------------------------------------------===// 4881// Patterns and instructions to describe movd/movq to XMM register zero-extends 4882// 4883let isCodeGenOnly = 1, SchedRW = [WriteMove] in { 4884let AddedComplexity = 15 in { 4885def VMOVZQI2PQIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4886 "movq\t{$src, $dst|$dst, $src}", // X86-64 only 4887 [(set VR128:$dst, (v2i64 (X86vzmovl 4888 (v2i64 (scalar_to_vector GR64:$src)))))], 4889 IIC_SSE_MOVDQ>, 4890 VEX, VEX_W; 4891def MOVZQI2PQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), 4892 "mov{d|q}\t{$src, $dst|$dst, $src}", // X86-64 only 4893 [(set VR128:$dst, (v2i64 (X86vzmovl 4894 (v2i64 (scalar_to_vector GR64:$src)))))], 4895 IIC_SSE_MOVDQ>; 4896} 4897} // isCodeGenOnly, SchedRW 4898 4899let Predicates = [UseAVX] in { 4900 let AddedComplexity = 15 in 4901 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4902 (VMOVDI2PDIrr GR32:$src)>; 4903 4904 // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. 4905 // These instructions also write zeros in the high part of a 256-bit register. 4906 let AddedComplexity = 20 in { 4907 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4908 (VMOVDI2PDIrm addr:$src)>; 4909 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4910 (VMOVDI2PDIrm addr:$src)>; 4911 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4912 (VMOVDI2PDIrm addr:$src)>; 4913 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4914 (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), 4915 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; 4916 } 4917 // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. 4918 def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, 4919 (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), 4920 (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrr GR32:$src), sub_xmm)>; 4921 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 4922 (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), 4923 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrr GR64:$src), sub_xmm)>; 4924} 4925 4926let Predicates = [UseSSE2] in { 4927 let AddedComplexity = 15 in 4928 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), 4929 (MOVDI2PDIrr GR32:$src)>; 4930 4931 let AddedComplexity = 20 in { 4932 def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), 4933 (MOVDI2PDIrm addr:$src)>; 4934 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv4f32 addr:$src)))), 4935 (MOVDI2PDIrm addr:$src)>; 4936 def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), 4937 (MOVDI2PDIrm addr:$src)>; 4938 } 4939} 4940 4941// These are the correct encodings of the instructions so that we know how to 4942// read correct assembly, even though we continue to emit the wrong ones for 4943// compatibility with Darwin's buggy assembler. 4944def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4945 (MOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4946def : InstAlias<"movq\t{$src, $dst|$dst, $src}", 4947 (MOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4948// Allow "vmovd" but print "vmovq" since we don't need compatibility for AVX. 4949def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4950 (VMOV64toPQIrr VR128:$dst, GR64:$src), 0>; 4951def : InstAlias<"vmovd\t{$src, $dst|$dst, $src}", 4952 (VMOVPQIto64rr GR64:$dst, VR128:$src), 0>; 4953 4954//===---------------------------------------------------------------------===// 4955// SSE2 - Move Quadword 4956//===---------------------------------------------------------------------===// 4957 4958//===---------------------------------------------------------------------===// 4959// Move Quadword Int to Packed Quadword Int 4960// 4961 4962let ExeDomain = SSEPackedInt, SchedRW = [WriteLoad] in { 4963def VMOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4964 "vmovq\t{$src, $dst|$dst, $src}", 4965 [(set VR128:$dst, 4966 (v2i64 (scalar_to_vector (loadi64 addr:$src))))]>, XS, 4967 VEX, Requires<[UseAVX]>; 4968def MOVQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 4969 "movq\t{$src, $dst|$dst, $src}", 4970 [(set VR128:$dst, 4971 (v2i64 (scalar_to_vector (loadi64 addr:$src))))], 4972 IIC_SSE_MOVDQ>, XS, 4973 Requires<[UseSSE2]>; // SSE2 instruction with XS Prefix 4974} // ExeDomain, SchedRW 4975 4976//===---------------------------------------------------------------------===// 4977// Move Packed Quadword Int to Quadword Int 4978// 4979let ExeDomain = SSEPackedInt, SchedRW = [WriteStore] in { 4980def VMOVPQI2QImr : VS2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4981 "movq\t{$src, $dst|$dst, $src}", 4982 [(store (i64 (vector_extract (v2i64 VR128:$src), 4983 (iPTR 0))), addr:$dst)], 4984 IIC_SSE_MOVDQ>, VEX; 4985def MOVPQI2QImr : S2I<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src), 4986 "movq\t{$src, $dst|$dst, $src}", 4987 [(store (i64 (vector_extract (v2i64 VR128:$src), 4988 (iPTR 0))), addr:$dst)], 4989 IIC_SSE_MOVDQ>; 4990} // ExeDomain, SchedRW 4991 4992// For disassembler only 4993let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, 4994 SchedRW = [WriteVecLogic] in { 4995def VMOVPQI2QIrr : VS2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4996 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>, VEX; 4997def MOVPQI2QIrr : S2I<0xD6, MRMDestReg, (outs VR128:$dst), (ins VR128:$src), 4998 "movq\t{$src, $dst|$dst, $src}", [], IIC_SSE_MOVQ_RR>; 4999} 5000 5001//===---------------------------------------------------------------------===// 5002// Store / copy lower 64-bits of a XMM register. 5003// 5004let Predicates = [UseAVX] in 5005def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), 5006 (VMOVPQI2QImr addr:$dst, VR128:$src)>; 5007let Predicates = [UseSSE2] in 5008def : Pat<(int_x86_sse2_storel_dq addr:$dst, VR128:$src), 5009 (MOVPQI2QImr addr:$dst, VR128:$src)>; 5010 5011let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, AddedComplexity = 20 in { 5012def VMOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5013 "vmovq\t{$src, $dst|$dst, $src}", 5014 [(set VR128:$dst, 5015 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 5016 (loadi64 addr:$src))))))], 5017 IIC_SSE_MOVDQ>, 5018 XS, VEX, Requires<[UseAVX]>, Sched<[WriteLoad]>; 5019 5020def MOVZQI2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), 5021 "movq\t{$src, $dst|$dst, $src}", 5022 [(set VR128:$dst, 5023 (v2i64 (X86vzmovl (v2i64 (scalar_to_vector 5024 (loadi64 addr:$src))))))], 5025 IIC_SSE_MOVDQ>, 5026 XS, Requires<[UseSSE2]>, Sched<[WriteLoad]>; 5027} // ExeDomain, isCodeGenOnly, AddedComplexity 5028 5029let Predicates = [UseAVX], AddedComplexity = 20 in { 5030 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 5031 (VMOVZQI2PQIrm addr:$src)>; 5032 def : Pat<(v2i64 (X86vzload addr:$src)), 5033 (VMOVZQI2PQIrm addr:$src)>; 5034 def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, 5035 (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), 5036 (SUBREG_TO_REG (i64 0), (VMOVZQI2PQIrm addr:$src), sub_xmm)>; 5037} 5038 5039let Predicates = [UseSSE2], AddedComplexity = 20 in { 5040 def : Pat<(v2i64 (X86vzmovl (bc_v2i64 (loadv4f32 addr:$src)))), 5041 (MOVZQI2PQIrm addr:$src)>; 5042 def : Pat<(v2i64 (X86vzload addr:$src)), (MOVZQI2PQIrm addr:$src)>; 5043} 5044 5045let Predicates = [HasAVX] in { 5046def : Pat<(v4i64 (alignedX86vzload addr:$src)), 5047 (SUBREG_TO_REG (i32 0), (VMOVAPSrm addr:$src), sub_xmm)>; 5048def : Pat<(v4i64 (X86vzload addr:$src)), 5049 (SUBREG_TO_REG (i32 0), (VMOVUPSrm addr:$src), sub_xmm)>; 5050} 5051 5052//===---------------------------------------------------------------------===// 5053// Moving from XMM to XMM and clear upper 64 bits. Note, there is a bug in 5054// IA32 document. movq xmm1, xmm2 does clear the high bits. 5055// 5056let ExeDomain = SSEPackedInt, SchedRW = [WriteVecLogic] in { 5057let AddedComplexity = 15 in 5058def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5059 "vmovq\t{$src, $dst|$dst, $src}", 5060 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 5061 IIC_SSE_MOVQ_RR>, 5062 XS, VEX, Requires<[UseAVX]>; 5063let AddedComplexity = 15 in 5064def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5065 "movq\t{$src, $dst|$dst, $src}", 5066 [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))], 5067 IIC_SSE_MOVQ_RR>, 5068 XS, Requires<[UseSSE2]>; 5069} // ExeDomain, SchedRW 5070 5071let ExeDomain = SSEPackedInt, isCodeGenOnly = 1, SchedRW = [WriteVecLogicLd] in { 5072let AddedComplexity = 20 in 5073def VMOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5074 "vmovq\t{$src, $dst|$dst, $src}", 5075 [(set VR128:$dst, (v2i64 (X86vzmovl 5076 (loadv2i64 addr:$src))))], 5077 IIC_SSE_MOVDQ>, 5078 XS, VEX, Requires<[UseAVX]>; 5079let AddedComplexity = 20 in { 5080def MOVZPQILo2PQIrm : I<0x7E, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5081 "movq\t{$src, $dst|$dst, $src}", 5082 [(set VR128:$dst, (v2i64 (X86vzmovl 5083 (loadv2i64 addr:$src))))], 5084 IIC_SSE_MOVDQ>, 5085 XS, Requires<[UseSSE2]>; 5086} 5087} // ExeDomain, isCodeGenOnly, SchedRW 5088 5089let AddedComplexity = 20 in { 5090 let Predicates = [UseAVX] in { 5091 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 5092 (VMOVZPQILo2PQIrr VR128:$src)>; 5093 } 5094 let Predicates = [UseSSE2] in { 5095 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 5096 (MOVZPQILo2PQIrr VR128:$src)>; 5097 } 5098} 5099 5100//===---------------------------------------------------------------------===// 5101// SSE3 - Replicate Single FP - MOVSHDUP and MOVSLDUP 5102//===---------------------------------------------------------------------===// 5103multiclass sse3_replicate_sfp<bits<8> op, SDNode OpNode, string OpcodeStr, 5104 ValueType vt, RegisterClass RC, PatFrag mem_frag, 5105 X86MemOperand x86memop> { 5106def rr : S3SI<op, MRMSrcReg, (outs RC:$dst), (ins RC:$src), 5107 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5108 [(set RC:$dst, (vt (OpNode RC:$src)))], 5109 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 5110def rm : S3SI<op, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 5111 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5112 [(set RC:$dst, (OpNode (mem_frag addr:$src)))], 5113 IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; 5114} 5115 5116let Predicates = [HasAVX] in { 5117 defm VMOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 5118 v4f32, VR128, loadv4f32, f128mem>, VEX; 5119 defm VMOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 5120 v4f32, VR128, loadv4f32, f128mem>, VEX; 5121 defm VMOVSHDUPY : sse3_replicate_sfp<0x16, X86Movshdup, "vmovshdup", 5122 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 5123 defm VMOVSLDUPY : sse3_replicate_sfp<0x12, X86Movsldup, "vmovsldup", 5124 v8f32, VR256, loadv8f32, f256mem>, VEX, VEX_L; 5125} 5126defm MOVSHDUP : sse3_replicate_sfp<0x16, X86Movshdup, "movshdup", v4f32, VR128, 5127 memopv4f32, f128mem>; 5128defm MOVSLDUP : sse3_replicate_sfp<0x12, X86Movsldup, "movsldup", v4f32, VR128, 5129 memopv4f32, f128mem>; 5130 5131let Predicates = [HasAVX] in { 5132 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5133 (VMOVSHDUPrr VR128:$src)>; 5134 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (loadv2i64 addr:$src)))), 5135 (VMOVSHDUPrm addr:$src)>; 5136 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5137 (VMOVSLDUPrr VR128:$src)>; 5138 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (loadv2i64 addr:$src)))), 5139 (VMOVSLDUPrm addr:$src)>; 5140 def : Pat<(v8i32 (X86Movshdup VR256:$src)), 5141 (VMOVSHDUPYrr VR256:$src)>; 5142 def : Pat<(v8i32 (X86Movshdup (bc_v8i32 (loadv4i64 addr:$src)))), 5143 (VMOVSHDUPYrm addr:$src)>; 5144 def : Pat<(v8i32 (X86Movsldup VR256:$src)), 5145 (VMOVSLDUPYrr VR256:$src)>; 5146 def : Pat<(v8i32 (X86Movsldup (bc_v8i32 (loadv4i64 addr:$src)))), 5147 (VMOVSLDUPYrm addr:$src)>; 5148} 5149 5150let Predicates = [UseSSE3] in { 5151 def : Pat<(v4i32 (X86Movshdup VR128:$src)), 5152 (MOVSHDUPrr VR128:$src)>; 5153 def : Pat<(v4i32 (X86Movshdup (bc_v4i32 (memopv2i64 addr:$src)))), 5154 (MOVSHDUPrm addr:$src)>; 5155 def : Pat<(v4i32 (X86Movsldup VR128:$src)), 5156 (MOVSLDUPrr VR128:$src)>; 5157 def : Pat<(v4i32 (X86Movsldup (bc_v4i32 (memopv2i64 addr:$src)))), 5158 (MOVSLDUPrm addr:$src)>; 5159} 5160 5161//===---------------------------------------------------------------------===// 5162// SSE3 - Replicate Double FP - MOVDDUP 5163//===---------------------------------------------------------------------===// 5164 5165multiclass sse3_replicate_dfp<string OpcodeStr> { 5166def rr : S3DI<0x12, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 5167 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5168 [(set VR128:$dst, (v2f64 (X86Movddup VR128:$src)))], 5169 IIC_SSE_MOV_LH>, Sched<[WriteFShuffle]>; 5170def rm : S3DI<0x12, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), 5171 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5172 [(set VR128:$dst, 5173 (v2f64 (X86Movddup 5174 (scalar_to_vector (loadf64 addr:$src)))))], 5175 IIC_SSE_MOV_LH>, Sched<[WriteLoad]>; 5176} 5177 5178// FIXME: Merge with above classe when there're patterns for the ymm version 5179multiclass sse3_replicate_dfp_y<string OpcodeStr> { 5180def rr : S3DI<0x12, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src), 5181 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5182 [(set VR256:$dst, (v4f64 (X86Movddup VR256:$src)))]>, 5183 Sched<[WriteFShuffle]>; 5184def rm : S3DI<0x12, MRMSrcMem, (outs VR256:$dst), (ins f256mem:$src), 5185 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5186 [(set VR256:$dst, 5187 (v4f64 (X86Movddup 5188 (scalar_to_vector (loadf64 addr:$src)))))]>, 5189 Sched<[WriteLoad]>; 5190} 5191 5192let Predicates = [HasAVX] in { 5193 defm VMOVDDUP : sse3_replicate_dfp<"vmovddup">, VEX; 5194 defm VMOVDDUPY : sse3_replicate_dfp_y<"vmovddup">, VEX, VEX_L; 5195} 5196 5197defm MOVDDUP : sse3_replicate_dfp<"movddup">; 5198 5199let Predicates = [HasAVX] in { 5200 def : Pat<(X86Movddup (loadv2f64 addr:$src)), 5201 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5202 def : Pat<(X86Movddup (bc_v2f64 (loadv4f32 addr:$src))), 5203 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5204 def : Pat<(X86Movddup (bc_v2f64 (loadv2i64 addr:$src))), 5205 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5206 def : Pat<(X86Movddup (bc_v2f64 5207 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5208 (VMOVDDUPrm addr:$src)>, Requires<[HasAVX]>; 5209 5210 // 256-bit version 5211 def : Pat<(X86Movddup (loadv4f64 addr:$src)), 5212 (VMOVDDUPYrm addr:$src)>; 5213 def : Pat<(X86Movddup (loadv4i64 addr:$src)), 5214 (VMOVDDUPYrm addr:$src)>; 5215 def : Pat<(X86Movddup (v4i64 (scalar_to_vector (loadi64 addr:$src)))), 5216 (VMOVDDUPYrm addr:$src)>; 5217 def : Pat<(X86Movddup (v4i64 VR256:$src)), 5218 (VMOVDDUPYrr VR256:$src)>; 5219} 5220 5221let Predicates = [UseAVX, OptForSize] in { 5222 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), 5223 (VMOVDDUPrm addr:$src)>; 5224 def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 5225 (VMOVDDUPrm addr:$src)>; 5226} 5227 5228let Predicates = [UseSSE3] in { 5229 def : Pat<(X86Movddup (memopv2f64 addr:$src)), 5230 (MOVDDUPrm addr:$src)>; 5231 def : Pat<(X86Movddup (bc_v2f64 (memopv4f32 addr:$src))), 5232 (MOVDDUPrm addr:$src)>; 5233 def : Pat<(X86Movddup (bc_v2f64 (memopv2i64 addr:$src))), 5234 (MOVDDUPrm addr:$src)>; 5235 def : Pat<(X86Movddup (bc_v2f64 5236 (v2i64 (scalar_to_vector (loadi64 addr:$src))))), 5237 (MOVDDUPrm addr:$src)>; 5238} 5239 5240//===---------------------------------------------------------------------===// 5241// SSE3 - Move Unaligned Integer 5242//===---------------------------------------------------------------------===// 5243 5244let SchedRW = [WriteLoad] in { 5245let Predicates = [HasAVX] in { 5246 def VLDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5247 "vlddqu\t{$src, $dst|$dst, $src}", 5248 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))]>, VEX; 5249 def VLDDQUYrm : S3DI<0xF0, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 5250 "vlddqu\t{$src, $dst|$dst, $src}", 5251 [(set VR256:$dst, (int_x86_avx_ldu_dq_256 addr:$src))]>, 5252 VEX, VEX_L; 5253} 5254def LDDQUrm : S3DI<0xF0, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 5255 "lddqu\t{$src, $dst|$dst, $src}", 5256 [(set VR128:$dst, (int_x86_sse3_ldu_dq addr:$src))], 5257 IIC_SSE_LDDQU>; 5258} 5259 5260//===---------------------------------------------------------------------===// 5261// SSE3 - Arithmetic 5262//===---------------------------------------------------------------------===// 5263 5264multiclass sse3_addsub<Intrinsic Int, string OpcodeStr, RegisterClass RC, 5265 X86MemOperand x86memop, OpndItins itins, 5266 PatFrag ld_frag, bit Is2Addr = 1> { 5267 def rr : I<0xD0, MRMSrcReg, 5268 (outs RC:$dst), (ins RC:$src1, RC:$src2), 5269 !if(Is2Addr, 5270 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5271 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5272 [(set RC:$dst, (Int RC:$src1, RC:$src2))], itins.rr>, 5273 Sched<[itins.Sched]>; 5274 def rm : I<0xD0, MRMSrcMem, 5275 (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5276 !if(Is2Addr, 5277 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5278 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5279 [(set RC:$dst, (Int RC:$src1, (ld_frag addr:$src2)))], itins.rr>, 5280 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5281} 5282 5283let Predicates = [HasAVX] in { 5284 let ExeDomain = SSEPackedSingle in { 5285 defm VADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "vaddsubps", VR128, 5286 f128mem, SSE_ALU_F32P, loadv4f32, 0>, XD, VEX_4V; 5287 defm VADDSUBPSY : sse3_addsub<int_x86_avx_addsub_ps_256, "vaddsubps", VR256, 5288 f256mem, SSE_ALU_F32P, loadv8f32, 0>, XD, VEX_4V, VEX_L; 5289 } 5290 let ExeDomain = SSEPackedDouble in { 5291 defm VADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "vaddsubpd", VR128, 5292 f128mem, SSE_ALU_F64P, loadv2f64, 0>, PD, VEX_4V; 5293 defm VADDSUBPDY : sse3_addsub<int_x86_avx_addsub_pd_256, "vaddsubpd", VR256, 5294 f256mem, SSE_ALU_F64P, loadv4f64, 0>, PD, VEX_4V, VEX_L; 5295 } 5296} 5297let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { 5298 let ExeDomain = SSEPackedSingle in 5299 defm ADDSUBPS : sse3_addsub<int_x86_sse3_addsub_ps, "addsubps", VR128, 5300 f128mem, SSE_ALU_F32P, memopv4f32>, XD; 5301 let ExeDomain = SSEPackedDouble in 5302 defm ADDSUBPD : sse3_addsub<int_x86_sse3_addsub_pd, "addsubpd", VR128, 5303 f128mem, SSE_ALU_F64P, memopv2f64>, PD; 5304} 5305 5306// Patterns used to select 'addsub' instructions. 5307let Predicates = [HasAVX] in { 5308 def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), 5309 (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; 5310 def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (loadv4f32 addr:$rhs))), 5311 (VADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; 5312 def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), 5313 (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; 5314 def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (loadv2f64 addr:$rhs))), 5315 (VADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; 5316 5317 def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (v8f32 VR256:$rhs))), 5318 (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; 5319 def : Pat<(v8f32 (X86Addsub (v8f32 VR256:$lhs), (loadv8f32 addr:$rhs))), 5320 (VADDSUBPSYrm VR256:$lhs, f256mem:$rhs)>; 5321 def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (v4f64 VR256:$rhs))), 5322 (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; 5323 def : Pat<(v4f64 (X86Addsub (v4f64 VR256:$lhs), (loadv4f64 addr:$rhs))), 5324 (VADDSUBPDYrm VR256:$lhs, f256mem:$rhs)>; 5325} 5326 5327let Predicates = [UseSSE3] in { 5328 def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (v4f32 VR128:$rhs))), 5329 (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; 5330 def : Pat<(v4f32 (X86Addsub (v4f32 VR128:$lhs), (memopv4f32 addr:$rhs))), 5331 (ADDSUBPSrm VR128:$lhs, f128mem:$rhs)>; 5332 def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (v2f64 VR128:$rhs))), 5333 (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; 5334 def : Pat<(v2f64 (X86Addsub (v2f64 VR128:$lhs), (memopv2f64 addr:$rhs))), 5335 (ADDSUBPDrm VR128:$lhs, f128mem:$rhs)>; 5336} 5337 5338//===---------------------------------------------------------------------===// 5339// SSE3 Instructions 5340//===---------------------------------------------------------------------===// 5341 5342// Horizontal ops 5343multiclass S3D_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5344 X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, 5345 bit Is2Addr = 1> { 5346 def rr : S3DI<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5347 !if(Is2Addr, 5348 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5349 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5350 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 5351 Sched<[WriteFAdd]>; 5352 5353 def rm : S3DI<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5354 !if(Is2Addr, 5355 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5356 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5357 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))], 5358 IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 5359} 5360multiclass S3_Int<bits<8> o, string OpcodeStr, ValueType vt, RegisterClass RC, 5361 X86MemOperand x86memop, SDNode OpNode, PatFrag ld_frag, 5362 bit Is2Addr = 1> { 5363 def rr : S3I<o, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), 5364 !if(Is2Addr, 5365 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5366 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5367 [(set RC:$dst, (vt (OpNode RC:$src1, RC:$src2)))], IIC_SSE_HADDSUB_RR>, 5368 Sched<[WriteFAdd]>; 5369 5370 def rm : S3I<o, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2), 5371 !if(Is2Addr, 5372 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5373 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5374 [(set RC:$dst, (vt (OpNode RC:$src1, (ld_frag addr:$src2))))], 5375 IIC_SSE_HADDSUB_RM>, Sched<[WriteFAddLd, ReadAfterLd]>; 5376} 5377 5378let Predicates = [HasAVX] in { 5379 let ExeDomain = SSEPackedSingle in { 5380 defm VHADDPS : S3D_Int<0x7C, "vhaddps", v4f32, VR128, f128mem, 5381 X86fhadd, loadv4f32, 0>, VEX_4V; 5382 defm VHSUBPS : S3D_Int<0x7D, "vhsubps", v4f32, VR128, f128mem, 5383 X86fhsub, loadv4f32, 0>, VEX_4V; 5384 defm VHADDPSY : S3D_Int<0x7C, "vhaddps", v8f32, VR256, f256mem, 5385 X86fhadd, loadv8f32, 0>, VEX_4V, VEX_L; 5386 defm VHSUBPSY : S3D_Int<0x7D, "vhsubps", v8f32, VR256, f256mem, 5387 X86fhsub, loadv8f32, 0>, VEX_4V, VEX_L; 5388 } 5389 let ExeDomain = SSEPackedDouble in { 5390 defm VHADDPD : S3_Int <0x7C, "vhaddpd", v2f64, VR128, f128mem, 5391 X86fhadd, loadv2f64, 0>, VEX_4V; 5392 defm VHSUBPD : S3_Int <0x7D, "vhsubpd", v2f64, VR128, f128mem, 5393 X86fhsub, loadv2f64, 0>, VEX_4V; 5394 defm VHADDPDY : S3_Int <0x7C, "vhaddpd", v4f64, VR256, f256mem, 5395 X86fhadd, loadv4f64, 0>, VEX_4V, VEX_L; 5396 defm VHSUBPDY : S3_Int <0x7D, "vhsubpd", v4f64, VR256, f256mem, 5397 X86fhsub, loadv4f64, 0>, VEX_4V, VEX_L; 5398 } 5399} 5400 5401let Constraints = "$src1 = $dst" in { 5402 let ExeDomain = SSEPackedSingle in { 5403 defm HADDPS : S3D_Int<0x7C, "haddps", v4f32, VR128, f128mem, X86fhadd, 5404 memopv4f32>; 5405 defm HSUBPS : S3D_Int<0x7D, "hsubps", v4f32, VR128, f128mem, X86fhsub, 5406 memopv4f32>; 5407 } 5408 let ExeDomain = SSEPackedDouble in { 5409 defm HADDPD : S3_Int<0x7C, "haddpd", v2f64, VR128, f128mem, X86fhadd, 5410 memopv2f64>; 5411 defm HSUBPD : S3_Int<0x7D, "hsubpd", v2f64, VR128, f128mem, X86fhsub, 5412 memopv2f64>; 5413 } 5414} 5415 5416//===---------------------------------------------------------------------===// 5417// SSSE3 - Packed Absolute Instructions 5418//===---------------------------------------------------------------------===// 5419 5420 5421/// SS3I_unop_rm_int - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5422multiclass SS3I_unop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, 5423 PatFrag ld_frag> { 5424 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5425 (ins VR128:$src), 5426 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5427 [(set VR128:$dst, (IntId128 VR128:$src))], IIC_SSE_PABS_RR>, 5428 Sched<[WriteVecALU]>; 5429 5430 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5431 (ins i128mem:$src), 5432 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5433 [(set VR128:$dst, 5434 (IntId128 5435 (bitconvert (ld_frag addr:$src))))], IIC_SSE_PABS_RM>, 5436 Sched<[WriteVecALULd]>; 5437} 5438 5439/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. 5440multiclass SS3I_unop_rm_int_y<bits<8> opc, string OpcodeStr, 5441 Intrinsic IntId256> { 5442 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5443 (ins VR256:$src), 5444 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5445 [(set VR256:$dst, (IntId256 VR256:$src))]>, 5446 Sched<[WriteVecALU]>; 5447 5448 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5449 (ins i256mem:$src), 5450 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5451 [(set VR256:$dst, 5452 (IntId256 5453 (bitconvert (loadv4i64 addr:$src))))]>, 5454 Sched<[WriteVecALULd]>; 5455} 5456 5457// Helper fragments to match sext vXi1 to vXiY. 5458def v16i1sextv16i8 : PatLeaf<(v16i8 (X86pcmpgt (bc_v16i8 (v4i32 immAllZerosV)), 5459 VR128:$src))>; 5460def v8i1sextv8i16 : PatLeaf<(v8i16 (X86vsrai VR128:$src, (i8 15)))>; 5461def v4i1sextv4i32 : PatLeaf<(v4i32 (X86vsrai VR128:$src, (i8 31)))>; 5462def v32i1sextv32i8 : PatLeaf<(v32i8 (X86pcmpgt (bc_v32i8 (v8i32 immAllZerosV)), 5463 VR256:$src))>; 5464def v16i1sextv16i16: PatLeaf<(v16i16 (X86vsrai VR256:$src, (i8 15)))>; 5465def v8i1sextv8i32 : PatLeaf<(v8i32 (X86vsrai VR256:$src, (i8 31)))>; 5466 5467let Predicates = [HasAVX] in { 5468 defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", int_x86_ssse3_pabs_b_128, 5469 loadv2i64>, VEX; 5470 defm VPABSW : SS3I_unop_rm_int<0x1D, "vpabsw", int_x86_ssse3_pabs_w_128, 5471 loadv2i64>, VEX; 5472 defm VPABSD : SS3I_unop_rm_int<0x1E, "vpabsd", int_x86_ssse3_pabs_d_128, 5473 loadv2i64>, VEX; 5474 5475 def : Pat<(xor 5476 (bc_v2i64 (v16i1sextv16i8)), 5477 (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5478 (VPABSBrr128 VR128:$src)>; 5479 def : Pat<(xor 5480 (bc_v2i64 (v8i1sextv8i16)), 5481 (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5482 (VPABSWrr128 VR128:$src)>; 5483 def : Pat<(xor 5484 (bc_v2i64 (v4i1sextv4i32)), 5485 (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5486 (VPABSDrr128 VR128:$src)>; 5487} 5488 5489let Predicates = [HasAVX2] in { 5490 defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", 5491 int_x86_avx2_pabs_b>, VEX, VEX_L; 5492 defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", 5493 int_x86_avx2_pabs_w>, VEX, VEX_L; 5494 defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", 5495 int_x86_avx2_pabs_d>, VEX, VEX_L; 5496 5497 def : Pat<(xor 5498 (bc_v4i64 (v32i1sextv32i8)), 5499 (bc_v4i64 (add (v32i8 VR256:$src), (v32i1sextv32i8)))), 5500 (VPABSBrr256 VR256:$src)>; 5501 def : Pat<(xor 5502 (bc_v4i64 (v16i1sextv16i16)), 5503 (bc_v4i64 (add (v16i16 VR256:$src), (v16i1sextv16i16)))), 5504 (VPABSWrr256 VR256:$src)>; 5505 def : Pat<(xor 5506 (bc_v4i64 (v8i1sextv8i32)), 5507 (bc_v4i64 (add (v8i32 VR256:$src), (v8i1sextv8i32)))), 5508 (VPABSDrr256 VR256:$src)>; 5509} 5510 5511defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", int_x86_ssse3_pabs_b_128, 5512 memopv2i64>; 5513defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", int_x86_ssse3_pabs_w_128, 5514 memopv2i64>; 5515defm PABSD : SS3I_unop_rm_int<0x1E, "pabsd", int_x86_ssse3_pabs_d_128, 5516 memopv2i64>; 5517 5518let Predicates = [HasSSSE3] in { 5519 def : Pat<(xor 5520 (bc_v2i64 (v16i1sextv16i8)), 5521 (bc_v2i64 (add (v16i8 VR128:$src), (v16i1sextv16i8)))), 5522 (PABSBrr128 VR128:$src)>; 5523 def : Pat<(xor 5524 (bc_v2i64 (v8i1sextv8i16)), 5525 (bc_v2i64 (add (v8i16 VR128:$src), (v8i1sextv8i16)))), 5526 (PABSWrr128 VR128:$src)>; 5527 def : Pat<(xor 5528 (bc_v2i64 (v4i1sextv4i32)), 5529 (bc_v2i64 (add (v4i32 VR128:$src), (v4i1sextv4i32)))), 5530 (PABSDrr128 VR128:$src)>; 5531} 5532 5533//===---------------------------------------------------------------------===// 5534// SSSE3 - Packed Binary Operator Instructions 5535//===---------------------------------------------------------------------===// 5536 5537let Sched = WriteVecALU in { 5538def SSE_PHADDSUBD : OpndItins< 5539 IIC_SSE_PHADDSUBD_RR, IIC_SSE_PHADDSUBD_RM 5540>; 5541def SSE_PHADDSUBSW : OpndItins< 5542 IIC_SSE_PHADDSUBSW_RR, IIC_SSE_PHADDSUBSW_RM 5543>; 5544def SSE_PHADDSUBW : OpndItins< 5545 IIC_SSE_PHADDSUBW_RR, IIC_SSE_PHADDSUBW_RM 5546>; 5547} 5548let Sched = WriteShuffle in 5549def SSE_PSHUFB : OpndItins< 5550 IIC_SSE_PSHUFB_RR, IIC_SSE_PSHUFB_RM 5551>; 5552let Sched = WriteVecALU in 5553def SSE_PSIGN : OpndItins< 5554 IIC_SSE_PSIGN_RR, IIC_SSE_PSIGN_RM 5555>; 5556let Sched = WriteVecIMul in 5557def SSE_PMULHRSW : OpndItins< 5558 IIC_SSE_PMULHRSW, IIC_SSE_PMULHRSW 5559>; 5560 5561/// SS3I_binop_rm - Simple SSSE3 bin op 5562multiclass SS3I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 5563 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 5564 X86MemOperand x86memop, OpndItins itins, 5565 bit Is2Addr = 1> { 5566 let isCommutable = 1 in 5567 def rr : SS38I<opc, MRMSrcReg, (outs RC:$dst), 5568 (ins RC:$src1, RC:$src2), 5569 !if(Is2Addr, 5570 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5571 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5572 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))], itins.rr>, 5573 Sched<[itins.Sched]>; 5574 def rm : SS38I<opc, MRMSrcMem, (outs RC:$dst), 5575 (ins RC:$src1, x86memop:$src2), 5576 !if(Is2Addr, 5577 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5578 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5579 [(set RC:$dst, 5580 (OpVT (OpNode RC:$src1, 5581 (bitconvert (memop_frag addr:$src2)))))], itins.rm>, 5582 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5583} 5584 5585/// SS3I_binop_rm_int - Simple SSSE3 bin op whose type can be v*{i8,i16,i32}. 5586multiclass SS3I_binop_rm_int<bits<8> opc, string OpcodeStr, 5587 Intrinsic IntId128, OpndItins itins, 5588 PatFrag ld_frag, bit Is2Addr = 1> { 5589 let isCommutable = 1 in 5590 def rr128 : SS38I<opc, MRMSrcReg, (outs VR128:$dst), 5591 (ins VR128:$src1, VR128:$src2), 5592 !if(Is2Addr, 5593 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5594 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5595 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 5596 Sched<[itins.Sched]>; 5597 def rm128 : SS38I<opc, MRMSrcMem, (outs VR128:$dst), 5598 (ins VR128:$src1, i128mem:$src2), 5599 !if(Is2Addr, 5600 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 5601 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 5602 [(set VR128:$dst, 5603 (IntId128 VR128:$src1, 5604 (bitconvert (ld_frag addr:$src2))))]>, 5605 Sched<[itins.Sched.Folded, ReadAfterLd]>; 5606} 5607 5608multiclass SS3I_binop_rm_int_y<bits<8> opc, string OpcodeStr, 5609 Intrinsic IntId256, 5610 X86FoldableSchedWrite Sched> { 5611 let isCommutable = 1 in 5612 def rr256 : SS38I<opc, MRMSrcReg, (outs VR256:$dst), 5613 (ins VR256:$src1, VR256:$src2), 5614 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5615 [(set VR256:$dst, (IntId256 VR256:$src1, VR256:$src2))]>, 5616 Sched<[Sched]>; 5617 def rm256 : SS38I<opc, MRMSrcMem, (outs VR256:$dst), 5618 (ins VR256:$src1, i256mem:$src2), 5619 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 5620 [(set VR256:$dst, 5621 (IntId256 VR256:$src1, (bitconvert (loadv4i64 addr:$src2))))]>, 5622 Sched<[Sched.Folded, ReadAfterLd]>; 5623} 5624 5625let ImmT = NoImm, Predicates = [HasAVX] in { 5626let isCommutable = 0 in { 5627 defm VPHADDW : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v8i16, VR128, 5628 loadv2i64, i128mem, 5629 SSE_PHADDSUBW, 0>, VEX_4V; 5630 defm VPHADDD : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v4i32, VR128, 5631 loadv2i64, i128mem, 5632 SSE_PHADDSUBD, 0>, VEX_4V; 5633 defm VPHSUBW : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v8i16, VR128, 5634 loadv2i64, i128mem, 5635 SSE_PHADDSUBW, 0>, VEX_4V; 5636 defm VPHSUBD : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v4i32, VR128, 5637 loadv2i64, i128mem, 5638 SSE_PHADDSUBD, 0>, VEX_4V; 5639 defm VPSIGNB : SS3I_binop_rm<0x08, "vpsignb", X86psign, v16i8, VR128, 5640 loadv2i64, i128mem, 5641 SSE_PSIGN, 0>, VEX_4V; 5642 defm VPSIGNW : SS3I_binop_rm<0x09, "vpsignw", X86psign, v8i16, VR128, 5643 loadv2i64, i128mem, 5644 SSE_PSIGN, 0>, VEX_4V; 5645 defm VPSIGND : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v4i32, VR128, 5646 loadv2i64, i128mem, 5647 SSE_PSIGN, 0>, VEX_4V; 5648 defm VPSHUFB : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v16i8, VR128, 5649 loadv2i64, i128mem, 5650 SSE_PSHUFB, 0>, VEX_4V; 5651 defm VPHADDSW : SS3I_binop_rm_int<0x03, "vphaddsw", 5652 int_x86_ssse3_phadd_sw_128, 5653 SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V; 5654 defm VPHSUBSW : SS3I_binop_rm_int<0x07, "vphsubsw", 5655 int_x86_ssse3_phsub_sw_128, 5656 SSE_PHADDSUBSW, loadv2i64, 0>, VEX_4V; 5657 defm VPMADDUBSW : SS3I_binop_rm_int<0x04, "vpmaddubsw", 5658 int_x86_ssse3_pmadd_ub_sw_128, 5659 SSE_PMADD, loadv2i64, 0>, VEX_4V; 5660} 5661defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", 5662 int_x86_ssse3_pmul_hr_sw_128, 5663 SSE_PMULHRSW, loadv2i64, 0>, VEX_4V; 5664} 5665 5666let ImmT = NoImm, Predicates = [HasAVX2] in { 5667let isCommutable = 0 in { 5668 defm VPHADDWY : SS3I_binop_rm<0x01, "vphaddw", X86hadd, v16i16, VR256, 5669 loadv4i64, i256mem, 5670 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5671 defm VPHADDDY : SS3I_binop_rm<0x02, "vphaddd", X86hadd, v8i32, VR256, 5672 loadv4i64, i256mem, 5673 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5674 defm VPHSUBWY : SS3I_binop_rm<0x05, "vphsubw", X86hsub, v16i16, VR256, 5675 loadv4i64, i256mem, 5676 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5677 defm VPHSUBDY : SS3I_binop_rm<0x06, "vphsubd", X86hsub, v8i32, VR256, 5678 loadv4i64, i256mem, 5679 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5680 defm VPSIGNBY : SS3I_binop_rm<0x08, "vpsignb", X86psign, v32i8, VR256, 5681 loadv4i64, i256mem, 5682 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5683 defm VPSIGNWY : SS3I_binop_rm<0x09, "vpsignw", X86psign, v16i16, VR256, 5684 loadv4i64, i256mem, 5685 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5686 defm VPSIGNDY : SS3I_binop_rm<0x0A, "vpsignd", X86psign, v8i32, VR256, 5687 loadv4i64, i256mem, 5688 SSE_PHADDSUBW, 0>, VEX_4V, VEX_L; 5689 defm VPSHUFBY : SS3I_binop_rm<0x00, "vpshufb", X86pshufb, v32i8, VR256, 5690 loadv4i64, i256mem, 5691 SSE_PSHUFB, 0>, VEX_4V, VEX_L; 5692 defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", 5693 int_x86_avx2_phadd_sw, 5694 WriteVecALU>, VEX_4V, VEX_L; 5695 defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", 5696 int_x86_avx2_phsub_sw, 5697 WriteVecALU>, VEX_4V, VEX_L; 5698 defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", 5699 int_x86_avx2_pmadd_ub_sw, 5700 WriteVecIMul>, VEX_4V, VEX_L; 5701} 5702defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", 5703 int_x86_avx2_pmul_hr_sw, 5704 WriteVecIMul>, VEX_4V, VEX_L; 5705} 5706 5707// None of these have i8 immediate fields. 5708let ImmT = NoImm, Constraints = "$src1 = $dst" in { 5709let isCommutable = 0 in { 5710 defm PHADDW : SS3I_binop_rm<0x01, "phaddw", X86hadd, v8i16, VR128, 5711 memopv2i64, i128mem, SSE_PHADDSUBW>; 5712 defm PHADDD : SS3I_binop_rm<0x02, "phaddd", X86hadd, v4i32, VR128, 5713 memopv2i64, i128mem, SSE_PHADDSUBD>; 5714 defm PHSUBW : SS3I_binop_rm<0x05, "phsubw", X86hsub, v8i16, VR128, 5715 memopv2i64, i128mem, SSE_PHADDSUBW>; 5716 defm PHSUBD : SS3I_binop_rm<0x06, "phsubd", X86hsub, v4i32, VR128, 5717 memopv2i64, i128mem, SSE_PHADDSUBD>; 5718 defm PSIGNB : SS3I_binop_rm<0x08, "psignb", X86psign, v16i8, VR128, 5719 memopv2i64, i128mem, SSE_PSIGN>; 5720 defm PSIGNW : SS3I_binop_rm<0x09, "psignw", X86psign, v8i16, VR128, 5721 memopv2i64, i128mem, SSE_PSIGN>; 5722 defm PSIGND : SS3I_binop_rm<0x0A, "psignd", X86psign, v4i32, VR128, 5723 memopv2i64, i128mem, SSE_PSIGN>; 5724 defm PSHUFB : SS3I_binop_rm<0x00, "pshufb", X86pshufb, v16i8, VR128, 5725 memopv2i64, i128mem, SSE_PSHUFB>; 5726 defm PHADDSW : SS3I_binop_rm_int<0x03, "phaddsw", 5727 int_x86_ssse3_phadd_sw_128, 5728 SSE_PHADDSUBSW, memopv2i64>; 5729 defm PHSUBSW : SS3I_binop_rm_int<0x07, "phsubsw", 5730 int_x86_ssse3_phsub_sw_128, 5731 SSE_PHADDSUBSW, memopv2i64>; 5732 defm PMADDUBSW : SS3I_binop_rm_int<0x04, "pmaddubsw", 5733 int_x86_ssse3_pmadd_ub_sw_128, 5734 SSE_PMADD, memopv2i64>; 5735} 5736defm PMULHRSW : SS3I_binop_rm_int<0x0B, "pmulhrsw", 5737 int_x86_ssse3_pmul_hr_sw_128, 5738 SSE_PMULHRSW, memopv2i64>; 5739} 5740 5741//===---------------------------------------------------------------------===// 5742// SSSE3 - Packed Align Instruction Patterns 5743//===---------------------------------------------------------------------===// 5744 5745multiclass ssse3_palignr<string asm, bit Is2Addr = 1> { 5746 let hasSideEffects = 0 in { 5747 def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), 5748 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 5749 !if(Is2Addr, 5750 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5751 !strconcat(asm, 5752 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5753 [], IIC_SSE_PALIGNRR>, Sched<[WriteShuffle]>; 5754 let mayLoad = 1 in 5755 def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), 5756 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 5757 !if(Is2Addr, 5758 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 5759 !strconcat(asm, 5760 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 5761 [], IIC_SSE_PALIGNRM>, Sched<[WriteShuffleLd, ReadAfterLd]>; 5762 } 5763} 5764 5765multiclass ssse3_palignr_y<string asm, bit Is2Addr = 1> { 5766 let hasSideEffects = 0 in { 5767 def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), 5768 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 5769 !strconcat(asm, 5770 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5771 []>, Sched<[WriteShuffle]>; 5772 let mayLoad = 1 in 5773 def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), 5774 (ins VR256:$src1, i256mem:$src2, u8imm:$src3), 5775 !strconcat(asm, 5776 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 5777 []>, Sched<[WriteShuffleLd, ReadAfterLd]>; 5778 } 5779} 5780 5781let Predicates = [HasAVX] in 5782 defm VPALIGN : ssse3_palignr<"vpalignr", 0>, VEX_4V; 5783let Predicates = [HasAVX2] in 5784 defm VPALIGN : ssse3_palignr_y<"vpalignr", 0>, VEX_4V, VEX_L; 5785let Constraints = "$src1 = $dst", Predicates = [UseSSSE3] in 5786 defm PALIGN : ssse3_palignr<"palignr">; 5787 5788let Predicates = [HasAVX2] in { 5789def : Pat<(v8i32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5790 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5791def : Pat<(v8f32 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5792 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5793def : Pat<(v16i16 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5794 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5795def : Pat<(v32i8 (X86PAlignr VR256:$src1, VR256:$src2, (i8 imm:$imm))), 5796 (VPALIGNR256rr VR256:$src2, VR256:$src1, imm:$imm)>; 5797} 5798 5799let Predicates = [HasAVX] in { 5800def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5801 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5802def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5803 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5804def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5805 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5806def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5807 (VPALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5808} 5809 5810let Predicates = [UseSSSE3] in { 5811def : Pat<(v4i32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5812 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5813def : Pat<(v4f32 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5814 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5815def : Pat<(v8i16 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5816 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5817def : Pat<(v16i8 (X86PAlignr VR128:$src1, VR128:$src2, (i8 imm:$imm))), 5818 (PALIGNR128rr VR128:$src2, VR128:$src1, imm:$imm)>; 5819} 5820 5821//===---------------------------------------------------------------------===// 5822// SSSE3 - Thread synchronization 5823//===---------------------------------------------------------------------===// 5824 5825let SchedRW = [WriteSystem] in { 5826let usesCustomInserter = 1 in { 5827def MONITOR : PseudoI<(outs), (ins i32mem:$src1, GR32:$src2, GR32:$src3), 5828 [(int_x86_sse3_monitor addr:$src1, GR32:$src2, GR32:$src3)]>, 5829 Requires<[HasSSE3]>; 5830} 5831 5832let Uses = [EAX, ECX, EDX] in 5833def MONITORrrr : I<0x01, MRM_C8, (outs), (ins), "monitor", [], IIC_SSE_MONITOR>, 5834 TB, Requires<[HasSSE3]>; 5835let Uses = [ECX, EAX] in 5836def MWAITrr : I<0x01, MRM_C9, (outs), (ins), "mwait", 5837 [(int_x86_sse3_mwait ECX, EAX)], IIC_SSE_MWAIT>, 5838 TB, Requires<[HasSSE3]>; 5839} // SchedRW 5840 5841def : InstAlias<"mwait\t{%eax, %ecx|ecx, eax}", (MWAITrr)>, Requires<[Not64BitMode]>; 5842def : InstAlias<"mwait\t{%rax, %rcx|rcx, rax}", (MWAITrr)>, Requires<[In64BitMode]>; 5843 5844def : InstAlias<"monitor\t{%eax, %ecx, %edx|edx, ecx, eax}", (MONITORrrr)>, 5845 Requires<[Not64BitMode]>; 5846def : InstAlias<"monitor\t{%rax, %rcx, %rdx|rdx, rcx, rax}", (MONITORrrr)>, 5847 Requires<[In64BitMode]>; 5848 5849//===----------------------------------------------------------------------===// 5850// SSE4.1 - Packed Move with Sign/Zero Extend 5851//===----------------------------------------------------------------------===// 5852 5853multiclass SS41I_pmovx_rrrm<bits<8> opc, string OpcodeStr, X86MemOperand MemOp, 5854 RegisterClass OutRC, RegisterClass InRC, 5855 OpndItins itins> { 5856 def rr : SS48I<opc, MRMSrcReg, (outs OutRC:$dst), (ins InRC:$src), 5857 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5858 [], itins.rr>, 5859 Sched<[itins.Sched]>; 5860 5861 def rm : SS48I<opc, MRMSrcMem, (outs OutRC:$dst), (ins MemOp:$src), 5862 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 5863 [], 5864 itins.rm>, Sched<[itins.Sched.Folded]>; 5865} 5866 5867multiclass SS41I_pmovx_rm_all<bits<8> opc, string OpcodeStr, 5868 X86MemOperand MemOp, X86MemOperand MemYOp, 5869 OpndItins SSEItins, OpndItins AVXItins, 5870 OpndItins AVX2Itins> { 5871 defm NAME : SS41I_pmovx_rrrm<opc, OpcodeStr, MemOp, VR128, VR128, SSEItins>; 5872 let Predicates = [HasAVX] in 5873 defm V#NAME : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemOp, 5874 VR128, VR128, AVXItins>, VEX; 5875 let Predicates = [HasAVX2] in 5876 defm V#NAME#Y : SS41I_pmovx_rrrm<opc, !strconcat("v", OpcodeStr), MemYOp, 5877 VR256, VR128, AVX2Itins>, VEX, VEX_L; 5878} 5879 5880multiclass SS41I_pmovx_rm<bits<8> opc, string OpcodeStr, 5881 X86MemOperand MemOp, X86MemOperand MemYOp> { 5882 defm PMOVSX#NAME : SS41I_pmovx_rm_all<opc, !strconcat("pmovsx", OpcodeStr), 5883 MemOp, MemYOp, 5884 SSE_INTALU_ITINS_SHUFF_P, 5885 DEFAULT_ITINS_SHUFFLESCHED, 5886 DEFAULT_ITINS_SHUFFLESCHED>; 5887 defm PMOVZX#NAME : SS41I_pmovx_rm_all<!add(opc, 0x10), 5888 !strconcat("pmovzx", OpcodeStr), 5889 MemOp, MemYOp, 5890 SSE_INTALU_ITINS_SHUFF_P, 5891 DEFAULT_ITINS_SHUFFLESCHED, 5892 DEFAULT_ITINS_SHUFFLESCHED>; 5893} 5894 5895defm BW : SS41I_pmovx_rm<0x20, "bw", i64mem, i128mem>; 5896defm WD : SS41I_pmovx_rm<0x23, "wd", i64mem, i128mem>; 5897defm DQ : SS41I_pmovx_rm<0x25, "dq", i64mem, i128mem>; 5898 5899defm BD : SS41I_pmovx_rm<0x21, "bd", i32mem, i64mem>; 5900defm WQ : SS41I_pmovx_rm<0x24, "wq", i32mem, i64mem>; 5901 5902defm BQ : SS41I_pmovx_rm<0x22, "bq", i16mem, i32mem>; 5903 5904// AVX2 Patterns 5905multiclass SS41I_pmovx_avx2_patterns<string OpcPrefix, string ExtTy, SDNode ExtOp> { 5906 // Register-Register patterns 5907 def : Pat<(v16i16 (ExtOp (v16i8 VR128:$src))), 5908 (!cast<I>(OpcPrefix#BWYrr) VR128:$src)>; 5909 def : Pat<(v8i32 (ExtOp (v16i8 VR128:$src))), 5910 (!cast<I>(OpcPrefix#BDYrr) VR128:$src)>; 5911 def : Pat<(v4i64 (ExtOp (v16i8 VR128:$src))), 5912 (!cast<I>(OpcPrefix#BQYrr) VR128:$src)>; 5913 5914 def : Pat<(v8i32 (ExtOp (v8i16 VR128:$src))), 5915 (!cast<I>(OpcPrefix#WDYrr) VR128:$src)>; 5916 def : Pat<(v4i64 (ExtOp (v8i16 VR128:$src))), 5917 (!cast<I>(OpcPrefix#WQYrr) VR128:$src)>; 5918 5919 def : Pat<(v4i64 (ExtOp (v4i32 VR128:$src))), 5920 (!cast<I>(OpcPrefix#DQYrr) VR128:$src)>; 5921 5922 // On AVX2, we also support 256bit inputs. 5923 def : Pat<(v16i16 (ExtOp (v32i8 VR256:$src))), 5924 (!cast<I>(OpcPrefix#BWYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5925 def : Pat<(v8i32 (ExtOp (v32i8 VR256:$src))), 5926 (!cast<I>(OpcPrefix#BDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5927 def : Pat<(v4i64 (ExtOp (v32i8 VR256:$src))), 5928 (!cast<I>(OpcPrefix#BQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5929 5930 def : Pat<(v8i32 (ExtOp (v16i16 VR256:$src))), 5931 (!cast<I>(OpcPrefix#WDYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5932 def : Pat<(v4i64 (ExtOp (v16i16 VR256:$src))), 5933 (!cast<I>(OpcPrefix#WQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5934 5935 def : Pat<(v4i64 (ExtOp (v8i32 VR256:$src))), 5936 (!cast<I>(OpcPrefix#DQYrr) (EXTRACT_SUBREG VR256:$src, sub_xmm))>; 5937 5938 // Simple Register-Memory patterns 5939 def : Pat<(v16i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5940 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5941 def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5942 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5943 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 5944 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5945 5946 def : Pat<(v8i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5947 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5948 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 5949 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5950 5951 def : Pat<(v4i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 5952 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 5953 5954 // AVX2 Register-Memory patterns 5955 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 5956 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5957 def : Pat<(v16i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 5958 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5959 def : Pat<(v16i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 5960 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5961 def : Pat<(v16i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 5962 (!cast<I>(OpcPrefix#BWYrm) addr:$src)>; 5963 5964 def : Pat<(v8i32 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5965 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5966 def : Pat<(v8i32 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 5967 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5968 def : Pat<(v8i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 5969 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5970 def : Pat<(v8i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 5971 (!cast<I>(OpcPrefix#BDYrm) addr:$src)>; 5972 5973 def : Pat<(v4i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 5974 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5975 def : Pat<(v4i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 5976 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5977 def : Pat<(v4i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 5978 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5979 def : Pat<(v4i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 5980 (!cast<I>(OpcPrefix#BQYrm) addr:$src)>; 5981 5982 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 5983 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5984 def : Pat<(v8i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 5985 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5986 def : Pat<(v8i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 5987 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5988 def : Pat<(v8i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 5989 (!cast<I>(OpcPrefix#WDYrm) addr:$src)>; 5990 5991 def : Pat<(v4i64 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 5992 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5993 def : Pat<(v4i64 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 5994 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5995 def : Pat<(v4i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 5996 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5997 def : Pat<(v4i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 5998 (!cast<I>(OpcPrefix#WQYrm) addr:$src)>; 5999 6000 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 6001 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6002 def : Pat<(v4i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), 6003 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6004 def : Pat<(v4i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), 6005 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6006 def : Pat<(v4i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 6007 (!cast<I>(OpcPrefix#DQYrm) addr:$src)>; 6008} 6009 6010let Predicates = [HasAVX2] in { 6011 defm : SS41I_pmovx_avx2_patterns<"VPMOVSX", "s", X86vsext>; 6012 defm : SS41I_pmovx_avx2_patterns<"VPMOVZX", "z", X86vzext>; 6013} 6014 6015// SSE4.1/AVX patterns. 6016multiclass SS41I_pmovx_patterns<string OpcPrefix, string ExtTy, 6017 SDNode ExtOp, PatFrag ExtLoad16> { 6018 def : Pat<(v8i16 (ExtOp (v16i8 VR128:$src))), 6019 (!cast<I>(OpcPrefix#BWrr) VR128:$src)>; 6020 def : Pat<(v4i32 (ExtOp (v16i8 VR128:$src))), 6021 (!cast<I>(OpcPrefix#BDrr) VR128:$src)>; 6022 def : Pat<(v2i64 (ExtOp (v16i8 VR128:$src))), 6023 (!cast<I>(OpcPrefix#BQrr) VR128:$src)>; 6024 6025 def : Pat<(v4i32 (ExtOp (v8i16 VR128:$src))), 6026 (!cast<I>(OpcPrefix#WDrr) VR128:$src)>; 6027 def : Pat<(v2i64 (ExtOp (v8i16 VR128:$src))), 6028 (!cast<I>(OpcPrefix#WQrr) VR128:$src)>; 6029 6030 def : Pat<(v2i64 (ExtOp (v4i32 VR128:$src))), 6031 (!cast<I>(OpcPrefix#DQrr) VR128:$src)>; 6032 6033 def : Pat<(v8i16 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 6034 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6035 def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 6036 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6037 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi8") addr:$src)), 6038 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6039 6040 def : Pat<(v4i32 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 6041 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6042 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi16") addr:$src)), 6043 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6044 6045 def : Pat<(v2i64 (!cast<PatFrag>(ExtTy#"extloadvi32") addr:$src)), 6046 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6047 6048 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6049 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6050 def : Pat<(v8i16 (ExtOp (bc_v16i8 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 6051 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6052 def : Pat<(v8i16 (ExtOp (v16i8 (vzmovl_v2i64 addr:$src)))), 6053 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6054 def : Pat<(v8i16 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6055 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6056 def : Pat<(v8i16 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6057 (!cast<I>(OpcPrefix#BWrm) addr:$src)>; 6058 6059 def : Pat<(v4i32 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 6060 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6061 def : Pat<(v4i32 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 6062 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6063 def : Pat<(v4i32 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6064 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6065 def : Pat<(v4i32 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6066 (!cast<I>(OpcPrefix#BDrm) addr:$src)>; 6067 6068 def : Pat<(v2i64 (ExtOp (bc_v16i8 (v4i32 (scalar_to_vector (ExtLoad16 addr:$src)))))), 6069 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6070 def : Pat<(v2i64 (ExtOp (v16i8 (vzmovl_v4i32 addr:$src)))), 6071 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6072 def : Pat<(v2i64 (ExtOp (v16i8 (vzload_v2i64 addr:$src)))), 6073 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6074 def : Pat<(v2i64 (ExtOp (bc_v16i8 (loadv2i64 addr:$src)))), 6075 (!cast<I>(OpcPrefix#BQrm) addr:$src)>; 6076 6077 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6078 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6079 def : Pat<(v4i32 (ExtOp (bc_v8i16 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 6080 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6081 def : Pat<(v4i32 (ExtOp (v8i16 (vzmovl_v2i64 addr:$src)))), 6082 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6083 def : Pat<(v4i32 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 6084 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6085 def : Pat<(v4i32 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6086 (!cast<I>(OpcPrefix#WDrm) addr:$src)>; 6087 6088 def : Pat<(v2i64 (ExtOp (bc_v8i16 (v4i32 (scalar_to_vector (loadi32 addr:$src)))))), 6089 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6090 def : Pat<(v2i64 (ExtOp (v8i16 (vzmovl_v4i32 addr:$src)))), 6091 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6092 def : Pat<(v2i64 (ExtOp (v8i16 (vzload_v2i64 addr:$src)))), 6093 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6094 def : Pat<(v2i64 (ExtOp (bc_v8i16 (loadv2i64 addr:$src)))), 6095 (!cast<I>(OpcPrefix#WQrm) addr:$src)>; 6096 6097 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), 6098 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6099 def : Pat<(v2i64 (ExtOp (bc_v4i32 (v2f64 (scalar_to_vector (loadf64 addr:$src)))))), 6100 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6101 def : Pat<(v2i64 (ExtOp (v4i32 (vzmovl_v2i64 addr:$src)))), 6102 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6103 def : Pat<(v2i64 (ExtOp (v4i32 (vzload_v2i64 addr:$src)))), 6104 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6105 def : Pat<(v2i64 (ExtOp (bc_v4i32 (loadv2i64 addr:$src)))), 6106 (!cast<I>(OpcPrefix#DQrm) addr:$src)>; 6107} 6108 6109let Predicates = [HasAVX] in { 6110 defm : SS41I_pmovx_patterns<"VPMOVSX", "s", X86vsext, extloadi32i16>; 6111 defm : SS41I_pmovx_patterns<"VPMOVZX", "z", X86vzext, loadi16_anyext>; 6112} 6113 6114let Predicates = [UseSSE41] in { 6115 defm : SS41I_pmovx_patterns<"PMOVSX", "s", X86vsext, extloadi32i16>; 6116 defm : SS41I_pmovx_patterns<"PMOVZX", "z", X86vzext, loadi16_anyext>; 6117} 6118 6119//===----------------------------------------------------------------------===// 6120// SSE4.1 - Extract Instructions 6121//===----------------------------------------------------------------------===// 6122 6123/// SS41I_binop_ext8 - SSE 4.1 extract 8 bits to 32 bit reg or 8 bit mem 6124multiclass SS41I_extract8<bits<8> opc, string OpcodeStr> { 6125 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6126 (ins VR128:$src1, u8imm:$src2), 6127 !strconcat(OpcodeStr, 6128 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6129 [(set GR32orGR64:$dst, (X86pextrb (v16i8 VR128:$src1), 6130 imm:$src2))]>, 6131 Sched<[WriteShuffle]>; 6132 let hasSideEffects = 0, mayStore = 1, 6133 SchedRW = [WriteShuffleLd, WriteRMW] in 6134 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6135 (ins i8mem:$dst, VR128:$src1, u8imm:$src2), 6136 !strconcat(OpcodeStr, 6137 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6138 [(store (i8 (trunc (assertzext (X86pextrb (v16i8 VR128:$src1), 6139 imm:$src2)))), addr:$dst)]>; 6140} 6141 6142let Predicates = [HasAVX] in 6143 defm VPEXTRB : SS41I_extract8<0x14, "vpextrb">, VEX; 6144 6145defm PEXTRB : SS41I_extract8<0x14, "pextrb">; 6146 6147 6148/// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination 6149multiclass SS41I_extract16<bits<8> opc, string OpcodeStr> { 6150 let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in 6151 def rr_REV : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6152 (ins VR128:$src1, u8imm:$src2), 6153 !strconcat(OpcodeStr, 6154 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6155 []>, Sched<[WriteShuffle]>; 6156 6157 let hasSideEffects = 0, mayStore = 1, 6158 SchedRW = [WriteShuffleLd, WriteRMW] in 6159 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6160 (ins i16mem:$dst, VR128:$src1, u8imm:$src2), 6161 !strconcat(OpcodeStr, 6162 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6163 [(store (i16 (trunc (assertzext (X86pextrw (v8i16 VR128:$src1), 6164 imm:$src2)))), addr:$dst)]>; 6165} 6166 6167let Predicates = [HasAVX] in 6168 defm VPEXTRW : SS41I_extract16<0x15, "vpextrw">, VEX; 6169 6170defm PEXTRW : SS41I_extract16<0x15, "pextrw">; 6171 6172 6173/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6174multiclass SS41I_extract32<bits<8> opc, string OpcodeStr> { 6175 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32:$dst), 6176 (ins VR128:$src1, u8imm:$src2), 6177 !strconcat(OpcodeStr, 6178 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6179 [(set GR32:$dst, 6180 (extractelt (v4i32 VR128:$src1), imm:$src2))]>, 6181 Sched<[WriteShuffle]>; 6182 let SchedRW = [WriteShuffleLd, WriteRMW] in 6183 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6184 (ins i32mem:$dst, VR128:$src1, u8imm:$src2), 6185 !strconcat(OpcodeStr, 6186 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6187 [(store (extractelt (v4i32 VR128:$src1), imm:$src2), 6188 addr:$dst)]>; 6189} 6190 6191let Predicates = [HasAVX] in 6192 defm VPEXTRD : SS41I_extract32<0x16, "vpextrd">, VEX; 6193 6194defm PEXTRD : SS41I_extract32<0x16, "pextrd">; 6195 6196/// SS41I_extract32 - SSE 4.1 extract 32 bits to int reg or memory destination 6197multiclass SS41I_extract64<bits<8> opc, string OpcodeStr> { 6198 def rr : SS4AIi8<opc, MRMDestReg, (outs GR64:$dst), 6199 (ins VR128:$src1, u8imm:$src2), 6200 !strconcat(OpcodeStr, 6201 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6202 [(set GR64:$dst, 6203 (extractelt (v2i64 VR128:$src1), imm:$src2))]>, 6204 Sched<[WriteShuffle]>, REX_W; 6205 let SchedRW = [WriteShuffleLd, WriteRMW] in 6206 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6207 (ins i64mem:$dst, VR128:$src1, u8imm:$src2), 6208 !strconcat(OpcodeStr, 6209 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6210 [(store (extractelt (v2i64 VR128:$src1), imm:$src2), 6211 addr:$dst)]>, REX_W; 6212} 6213 6214let Predicates = [HasAVX] in 6215 defm VPEXTRQ : SS41I_extract64<0x16, "vpextrq">, VEX, VEX_W; 6216 6217defm PEXTRQ : SS41I_extract64<0x16, "pextrq">; 6218 6219/// SS41I_extractf32 - SSE 4.1 extract 32 bits fp value to int reg or memory 6220/// destination 6221multiclass SS41I_extractf32<bits<8> opc, string OpcodeStr, 6222 OpndItins itins = DEFAULT_ITINS> { 6223 def rr : SS4AIi8<opc, MRMDestReg, (outs GR32orGR64:$dst), 6224 (ins VR128:$src1, u8imm:$src2), 6225 !strconcat(OpcodeStr, 6226 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6227 [(set GR32orGR64:$dst, 6228 (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2))], 6229 itins.rr>, Sched<[WriteFBlend]>; 6230 let SchedRW = [WriteFBlendLd, WriteRMW] in 6231 def mr : SS4AIi8<opc, MRMDestMem, (outs), 6232 (ins f32mem:$dst, VR128:$src1, u8imm:$src2), 6233 !strconcat(OpcodeStr, 6234 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6235 [(store (extractelt (bc_v4i32 (v4f32 VR128:$src1)), imm:$src2), 6236 addr:$dst)], itins.rm>; 6237} 6238 6239let ExeDomain = SSEPackedSingle in { 6240 let Predicates = [UseAVX] in 6241 defm VEXTRACTPS : SS41I_extractf32<0x17, "vextractps">, VEX; 6242 defm EXTRACTPS : SS41I_extractf32<0x17, "extractps", SSE_EXTRACT_ITINS>; 6243} 6244 6245// Also match an EXTRACTPS store when the store is done as f32 instead of i32. 6246def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6247 imm:$src2))), 6248 addr:$dst), 6249 (VEXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6250 Requires<[HasAVX]>; 6251def : Pat<(store (f32 (bitconvert (extractelt (bc_v4i32 (v4f32 VR128:$src1)), 6252 imm:$src2))), 6253 addr:$dst), 6254 (EXTRACTPSmr addr:$dst, VR128:$src1, imm:$src2)>, 6255 Requires<[UseSSE41]>; 6256 6257//===----------------------------------------------------------------------===// 6258// SSE4.1 - Insert Instructions 6259//===----------------------------------------------------------------------===// 6260 6261multiclass SS41I_insert8<bits<8> opc, string asm, bit Is2Addr = 1> { 6262 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6263 (ins VR128:$src1, GR32orGR64:$src2, u8imm:$src3), 6264 !if(Is2Addr, 6265 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6266 !strconcat(asm, 6267 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6268 [(set VR128:$dst, 6269 (X86pinsrb VR128:$src1, GR32orGR64:$src2, imm:$src3))]>, 6270 Sched<[WriteShuffle]>; 6271 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6272 (ins VR128:$src1, i8mem:$src2, u8imm:$src3), 6273 !if(Is2Addr, 6274 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6275 !strconcat(asm, 6276 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6277 [(set VR128:$dst, 6278 (X86pinsrb VR128:$src1, (extloadi8 addr:$src2), 6279 imm:$src3))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6280} 6281 6282let Predicates = [HasAVX] in 6283 defm VPINSRB : SS41I_insert8<0x20, "vpinsrb", 0>, VEX_4V; 6284let Constraints = "$src1 = $dst" in 6285 defm PINSRB : SS41I_insert8<0x20, "pinsrb">; 6286 6287multiclass SS41I_insert32<bits<8> opc, string asm, bit Is2Addr = 1> { 6288 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6289 (ins VR128:$src1, GR32:$src2, u8imm:$src3), 6290 !if(Is2Addr, 6291 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6292 !strconcat(asm, 6293 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6294 [(set VR128:$dst, 6295 (v4i32 (insertelt VR128:$src1, GR32:$src2, imm:$src3)))]>, 6296 Sched<[WriteShuffle]>; 6297 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6298 (ins VR128:$src1, i32mem:$src2, u8imm:$src3), 6299 !if(Is2Addr, 6300 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6301 !strconcat(asm, 6302 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6303 [(set VR128:$dst, 6304 (v4i32 (insertelt VR128:$src1, (loadi32 addr:$src2), 6305 imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6306} 6307 6308let Predicates = [HasAVX] in 6309 defm VPINSRD : SS41I_insert32<0x22, "vpinsrd", 0>, VEX_4V; 6310let Constraints = "$src1 = $dst" in 6311 defm PINSRD : SS41I_insert32<0x22, "pinsrd">; 6312 6313multiclass SS41I_insert64<bits<8> opc, string asm, bit Is2Addr = 1> { 6314 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6315 (ins VR128:$src1, GR64:$src2, u8imm:$src3), 6316 !if(Is2Addr, 6317 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6318 !strconcat(asm, 6319 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6320 [(set VR128:$dst, 6321 (v2i64 (insertelt VR128:$src1, GR64:$src2, imm:$src3)))]>, 6322 Sched<[WriteShuffle]>; 6323 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6324 (ins VR128:$src1, i64mem:$src2, u8imm:$src3), 6325 !if(Is2Addr, 6326 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6327 !strconcat(asm, 6328 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6329 [(set VR128:$dst, 6330 (v2i64 (insertelt VR128:$src1, (loadi64 addr:$src2), 6331 imm:$src3)))]>, Sched<[WriteShuffleLd, ReadAfterLd]>; 6332} 6333 6334let Predicates = [HasAVX] in 6335 defm VPINSRQ : SS41I_insert64<0x22, "vpinsrq", 0>, VEX_4V, VEX_W; 6336let Constraints = "$src1 = $dst" in 6337 defm PINSRQ : SS41I_insert64<0x22, "pinsrq">, REX_W; 6338 6339// insertps has a few different modes, there's the first two here below which 6340// are optimized inserts that won't zero arbitrary elements in the destination 6341// vector. The next one matches the intrinsic and could zero arbitrary elements 6342// in the target vector. 6343multiclass SS41I_insertf32<bits<8> opc, string asm, bit Is2Addr = 1, 6344 OpndItins itins = DEFAULT_ITINS> { 6345 def rr : SS4AIi8<opc, MRMSrcReg, (outs VR128:$dst), 6346 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 6347 !if(Is2Addr, 6348 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6349 !strconcat(asm, 6350 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6351 [(set VR128:$dst, 6352 (X86insertps VR128:$src1, VR128:$src2, imm:$src3))], itins.rr>, 6353 Sched<[WriteFShuffle]>; 6354 def rm : SS4AIi8<opc, MRMSrcMem, (outs VR128:$dst), 6355 (ins VR128:$src1, f32mem:$src2, u8imm:$src3), 6356 !if(Is2Addr, 6357 !strconcat(asm, "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6358 !strconcat(asm, 6359 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6360 [(set VR128:$dst, 6361 (X86insertps VR128:$src1, 6362 (v4f32 (scalar_to_vector (loadf32 addr:$src2))), 6363 imm:$src3))], itins.rm>, 6364 Sched<[WriteFShuffleLd, ReadAfterLd]>; 6365} 6366 6367let ExeDomain = SSEPackedSingle in { 6368 let Predicates = [UseAVX] in 6369 defm VINSERTPS : SS41I_insertf32<0x21, "vinsertps", 0>, VEX_4V; 6370 let Constraints = "$src1 = $dst" in 6371 defm INSERTPS : SS41I_insertf32<0x21, "insertps", 1, SSE_INSERT_ITINS>; 6372} 6373 6374let Predicates = [UseSSE41] in { 6375 // If we're inserting an element from a load or a null pshuf of a load, 6376 // fold the load into the insertps instruction. 6377 def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd (v4f32 6378 (scalar_to_vector (loadf32 addr:$src2))), (i8 0)), 6379 imm:$src3)), 6380 (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6381 def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), (X86PShufd 6382 (loadv4f32 addr:$src2), (i8 0)), imm:$src3)), 6383 (INSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6384} 6385 6386let Predicates = [UseAVX] in { 6387 // If we're inserting an element from a vbroadcast of a load, fold the 6388 // load into the X86insertps instruction. 6389 def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), 6390 (X86VBroadcast (loadf32 addr:$src2)), imm:$src3)), 6391 (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6392 def : Pat<(v4f32 (X86insertps (v4f32 VR128:$src1), 6393 (X86VBroadcast (loadv4f32 addr:$src2)), imm:$src3)), 6394 (VINSERTPSrm VR128:$src1, addr:$src2, imm:$src3)>; 6395} 6396 6397//===----------------------------------------------------------------------===// 6398// SSE4.1 - Round Instructions 6399//===----------------------------------------------------------------------===// 6400 6401multiclass sse41_fp_unop_rm<bits<8> opcps, bits<8> opcpd, string OpcodeStr, 6402 X86MemOperand x86memop, RegisterClass RC, 6403 PatFrag mem_frag32, PatFrag mem_frag64, 6404 Intrinsic V4F32Int, Intrinsic V2F64Int> { 6405let ExeDomain = SSEPackedSingle in { 6406 // Intrinsic operation, reg. 6407 // Vector intrinsic operation, reg 6408 def PSr : SS4AIi8<opcps, MRMSrcReg, 6409 (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 6410 !strconcat(OpcodeStr, 6411 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6412 [(set RC:$dst, (V4F32Int RC:$src1, imm:$src2))], 6413 IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>; 6414 6415 // Vector intrinsic operation, mem 6416 def PSm : SS4AIi8<opcps, MRMSrcMem, 6417 (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 6418 !strconcat(OpcodeStr, 6419 "ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6420 [(set RC:$dst, 6421 (V4F32Int (mem_frag32 addr:$src1),imm:$src2))], 6422 IIC_SSE_ROUNDPS_MEM>, Sched<[WriteFAddLd]>; 6423} // ExeDomain = SSEPackedSingle 6424 6425let ExeDomain = SSEPackedDouble in { 6426 // Vector intrinsic operation, reg 6427 def PDr : SS4AIi8<opcpd, MRMSrcReg, 6428 (outs RC:$dst), (ins RC:$src1, i32u8imm:$src2), 6429 !strconcat(OpcodeStr, 6430 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6431 [(set RC:$dst, (V2F64Int RC:$src1, imm:$src2))], 6432 IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAdd]>; 6433 6434 // Vector intrinsic operation, mem 6435 def PDm : SS4AIi8<opcpd, MRMSrcMem, 6436 (outs RC:$dst), (ins x86memop:$src1, i32u8imm:$src2), 6437 !strconcat(OpcodeStr, 6438 "pd\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 6439 [(set RC:$dst, 6440 (V2F64Int (mem_frag64 addr:$src1),imm:$src2))], 6441 IIC_SSE_ROUNDPS_REG>, Sched<[WriteFAddLd]>; 6442} // ExeDomain = SSEPackedDouble 6443} 6444 6445multiclass sse41_fp_binop_rm<bits<8> opcss, bits<8> opcsd, 6446 string OpcodeStr, 6447 Intrinsic F32Int, 6448 Intrinsic F64Int, bit Is2Addr = 1> { 6449let ExeDomain = GenericDomain in { 6450 // Operation, reg. 6451 let hasSideEffects = 0 in 6452 def SSr : SS4AIi8<opcss, MRMSrcReg, 6453 (outs FR32:$dst), (ins FR32:$src1, FR32:$src2, i32u8imm:$src3), 6454 !if(Is2Addr, 6455 !strconcat(OpcodeStr, 6456 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6457 !strconcat(OpcodeStr, 6458 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6459 []>, Sched<[WriteFAdd]>; 6460 6461 // Intrinsic operation, reg. 6462 let isCodeGenOnly = 1 in 6463 def SSr_Int : SS4AIi8<opcss, MRMSrcReg, 6464 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 6465 !if(Is2Addr, 6466 !strconcat(OpcodeStr, 6467 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6468 !strconcat(OpcodeStr, 6469 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6470 [(set VR128:$dst, (F32Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6471 Sched<[WriteFAdd]>; 6472 6473 // Intrinsic operation, mem. 6474 def SSm : SS4AIi8<opcss, MRMSrcMem, 6475 (outs VR128:$dst), (ins VR128:$src1, ssmem:$src2, i32u8imm:$src3), 6476 !if(Is2Addr, 6477 !strconcat(OpcodeStr, 6478 "ss\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6479 !strconcat(OpcodeStr, 6480 "ss\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6481 [(set VR128:$dst, 6482 (F32Int VR128:$src1, sse_load_f32:$src2, imm:$src3))]>, 6483 Sched<[WriteFAddLd, ReadAfterLd]>; 6484 6485 // Operation, reg. 6486 let hasSideEffects = 0 in 6487 def SDr : SS4AIi8<opcsd, MRMSrcReg, 6488 (outs FR64:$dst), (ins FR64:$src1, FR64:$src2, i32u8imm:$src3), 6489 !if(Is2Addr, 6490 !strconcat(OpcodeStr, 6491 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6492 !strconcat(OpcodeStr, 6493 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6494 []>, Sched<[WriteFAdd]>; 6495 6496 // Intrinsic operation, reg. 6497 let isCodeGenOnly = 1 in 6498 def SDr_Int : SS4AIi8<opcsd, MRMSrcReg, 6499 (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i32u8imm:$src3), 6500 !if(Is2Addr, 6501 !strconcat(OpcodeStr, 6502 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6503 !strconcat(OpcodeStr, 6504 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6505 [(set VR128:$dst, (F64Int VR128:$src1, VR128:$src2, imm:$src3))]>, 6506 Sched<[WriteFAdd]>; 6507 6508 // Intrinsic operation, mem. 6509 def SDm : SS4AIi8<opcsd, MRMSrcMem, 6510 (outs VR128:$dst), (ins VR128:$src1, sdmem:$src2, i32u8imm:$src3), 6511 !if(Is2Addr, 6512 !strconcat(OpcodeStr, 6513 "sd\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6514 !strconcat(OpcodeStr, 6515 "sd\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6516 [(set VR128:$dst, 6517 (F64Int VR128:$src1, sse_load_f64:$src2, imm:$src3))]>, 6518 Sched<[WriteFAddLd, ReadAfterLd]>; 6519} // ExeDomain = GenericDomain 6520} 6521 6522// FP round - roundss, roundps, roundsd, roundpd 6523let Predicates = [HasAVX] in { 6524 // Intrinsic form 6525 defm VROUND : sse41_fp_unop_rm<0x08, 0x09, "vround", f128mem, VR128, 6526 loadv4f32, loadv2f64, 6527 int_x86_sse41_round_ps, 6528 int_x86_sse41_round_pd>, VEX; 6529 defm VROUNDY : sse41_fp_unop_rm<0x08, 0x09, "vround", f256mem, VR256, 6530 loadv8f32, loadv4f64, 6531 int_x86_avx_round_ps_256, 6532 int_x86_avx_round_pd_256>, VEX, VEX_L; 6533 defm VROUND : sse41_fp_binop_rm<0x0A, 0x0B, "vround", 6534 int_x86_sse41_round_ss, 6535 int_x86_sse41_round_sd, 0>, VEX_4V, VEX_LIG; 6536} 6537 6538let Predicates = [UseAVX] in { 6539 def : Pat<(ffloor FR32:$src), 6540 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6541 def : Pat<(f64 (ffloor FR64:$src)), 6542 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6543 def : Pat<(f32 (fnearbyint FR32:$src)), 6544 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6545 def : Pat<(f64 (fnearbyint FR64:$src)), 6546 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6547 def : Pat<(f32 (fceil FR32:$src)), 6548 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6549 def : Pat<(f64 (fceil FR64:$src)), 6550 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6551 def : Pat<(f32 (frint FR32:$src)), 6552 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6553 def : Pat<(f64 (frint FR64:$src)), 6554 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6555 def : Pat<(f32 (ftrunc FR32:$src)), 6556 (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6557 def : Pat<(f64 (ftrunc FR64:$src)), 6558 (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6559} 6560 6561let Predicates = [HasAVX] in { 6562 def : Pat<(v4f32 (ffloor VR128:$src)), 6563 (VROUNDPSr VR128:$src, (i32 0x1))>; 6564 def : Pat<(v4f32 (fnearbyint VR128:$src)), 6565 (VROUNDPSr VR128:$src, (i32 0xC))>; 6566 def : Pat<(v4f32 (fceil VR128:$src)), 6567 (VROUNDPSr VR128:$src, (i32 0x2))>; 6568 def : Pat<(v4f32 (frint VR128:$src)), 6569 (VROUNDPSr VR128:$src, (i32 0x4))>; 6570 def : Pat<(v4f32 (ftrunc VR128:$src)), 6571 (VROUNDPSr VR128:$src, (i32 0x3))>; 6572 6573 def : Pat<(v2f64 (ffloor VR128:$src)), 6574 (VROUNDPDr VR128:$src, (i32 0x1))>; 6575 def : Pat<(v2f64 (fnearbyint VR128:$src)), 6576 (VROUNDPDr VR128:$src, (i32 0xC))>; 6577 def : Pat<(v2f64 (fceil VR128:$src)), 6578 (VROUNDPDr VR128:$src, (i32 0x2))>; 6579 def : Pat<(v2f64 (frint VR128:$src)), 6580 (VROUNDPDr VR128:$src, (i32 0x4))>; 6581 def : Pat<(v2f64 (ftrunc VR128:$src)), 6582 (VROUNDPDr VR128:$src, (i32 0x3))>; 6583 6584 def : Pat<(v8f32 (ffloor VR256:$src)), 6585 (VROUNDYPSr VR256:$src, (i32 0x1))>; 6586 def : Pat<(v8f32 (fnearbyint VR256:$src)), 6587 (VROUNDYPSr VR256:$src, (i32 0xC))>; 6588 def : Pat<(v8f32 (fceil VR256:$src)), 6589 (VROUNDYPSr VR256:$src, (i32 0x2))>; 6590 def : Pat<(v8f32 (frint VR256:$src)), 6591 (VROUNDYPSr VR256:$src, (i32 0x4))>; 6592 def : Pat<(v8f32 (ftrunc VR256:$src)), 6593 (VROUNDYPSr VR256:$src, (i32 0x3))>; 6594 6595 def : Pat<(v4f64 (ffloor VR256:$src)), 6596 (VROUNDYPDr VR256:$src, (i32 0x1))>; 6597 def : Pat<(v4f64 (fnearbyint VR256:$src)), 6598 (VROUNDYPDr VR256:$src, (i32 0xC))>; 6599 def : Pat<(v4f64 (fceil VR256:$src)), 6600 (VROUNDYPDr VR256:$src, (i32 0x2))>; 6601 def : Pat<(v4f64 (frint VR256:$src)), 6602 (VROUNDYPDr VR256:$src, (i32 0x4))>; 6603 def : Pat<(v4f64 (ftrunc VR256:$src)), 6604 (VROUNDYPDr VR256:$src, (i32 0x3))>; 6605} 6606 6607defm ROUND : sse41_fp_unop_rm<0x08, 0x09, "round", f128mem, VR128, 6608 memopv4f32, memopv2f64, 6609 int_x86_sse41_round_ps, int_x86_sse41_round_pd>; 6610let Constraints = "$src1 = $dst" in 6611defm ROUND : sse41_fp_binop_rm<0x0A, 0x0B, "round", 6612 int_x86_sse41_round_ss, int_x86_sse41_round_sd>; 6613 6614let Predicates = [UseSSE41] in { 6615 def : Pat<(ffloor FR32:$src), 6616 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x1))>; 6617 def : Pat<(f64 (ffloor FR64:$src)), 6618 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x1))>; 6619 def : Pat<(f32 (fnearbyint FR32:$src)), 6620 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0xC))>; 6621 def : Pat<(f64 (fnearbyint FR64:$src)), 6622 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0xC))>; 6623 def : Pat<(f32 (fceil FR32:$src)), 6624 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x2))>; 6625 def : Pat<(f64 (fceil FR64:$src)), 6626 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x2))>; 6627 def : Pat<(f32 (frint FR32:$src)), 6628 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x4))>; 6629 def : Pat<(f64 (frint FR64:$src)), 6630 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x4))>; 6631 def : Pat<(f32 (ftrunc FR32:$src)), 6632 (ROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src, (i32 0x3))>; 6633 def : Pat<(f64 (ftrunc FR64:$src)), 6634 (ROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src, (i32 0x3))>; 6635 6636 def : Pat<(v4f32 (ffloor VR128:$src)), 6637 (ROUNDPSr VR128:$src, (i32 0x1))>; 6638 def : Pat<(v4f32 (fnearbyint VR128:$src)), 6639 (ROUNDPSr VR128:$src, (i32 0xC))>; 6640 def : Pat<(v4f32 (fceil VR128:$src)), 6641 (ROUNDPSr VR128:$src, (i32 0x2))>; 6642 def : Pat<(v4f32 (frint VR128:$src)), 6643 (ROUNDPSr VR128:$src, (i32 0x4))>; 6644 def : Pat<(v4f32 (ftrunc VR128:$src)), 6645 (ROUNDPSr VR128:$src, (i32 0x3))>; 6646 6647 def : Pat<(v2f64 (ffloor VR128:$src)), 6648 (ROUNDPDr VR128:$src, (i32 0x1))>; 6649 def : Pat<(v2f64 (fnearbyint VR128:$src)), 6650 (ROUNDPDr VR128:$src, (i32 0xC))>; 6651 def : Pat<(v2f64 (fceil VR128:$src)), 6652 (ROUNDPDr VR128:$src, (i32 0x2))>; 6653 def : Pat<(v2f64 (frint VR128:$src)), 6654 (ROUNDPDr VR128:$src, (i32 0x4))>; 6655 def : Pat<(v2f64 (ftrunc VR128:$src)), 6656 (ROUNDPDr VR128:$src, (i32 0x3))>; 6657} 6658 6659//===----------------------------------------------------------------------===// 6660// SSE4.1 - Packed Bit Test 6661//===----------------------------------------------------------------------===// 6662 6663// ptest instruction we'll lower to this in X86ISelLowering primarily from 6664// the intel intrinsic that corresponds to this. 6665let Defs = [EFLAGS], Predicates = [HasAVX] in { 6666def VPTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6667 "vptest\t{$src2, $src1|$src1, $src2}", 6668 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6669 Sched<[WriteVecLogic]>, VEX; 6670def VPTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6671 "vptest\t{$src2, $src1|$src1, $src2}", 6672 [(set EFLAGS,(X86ptest VR128:$src1, (loadv2i64 addr:$src2)))]>, 6673 Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; 6674 6675def VPTESTYrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR256:$src1, VR256:$src2), 6676 "vptest\t{$src2, $src1|$src1, $src2}", 6677 [(set EFLAGS, (X86ptest VR256:$src1, (v4i64 VR256:$src2)))]>, 6678 Sched<[WriteVecLogic]>, VEX, VEX_L; 6679def VPTESTYrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR256:$src1, i256mem:$src2), 6680 "vptest\t{$src2, $src1|$src1, $src2}", 6681 [(set EFLAGS,(X86ptest VR256:$src1, (loadv4i64 addr:$src2)))]>, 6682 Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX, VEX_L; 6683} 6684 6685let Defs = [EFLAGS] in { 6686def PTESTrr : SS48I<0x17, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2), 6687 "ptest\t{$src2, $src1|$src1, $src2}", 6688 [(set EFLAGS, (X86ptest VR128:$src1, (v2i64 VR128:$src2)))]>, 6689 Sched<[WriteVecLogic]>; 6690def PTESTrm : SS48I<0x17, MRMSrcMem, (outs), (ins VR128:$src1, f128mem:$src2), 6691 "ptest\t{$src2, $src1|$src1, $src2}", 6692 [(set EFLAGS, (X86ptest VR128:$src1, (memopv2i64 addr:$src2)))]>, 6693 Sched<[WriteVecLogicLd, ReadAfterLd]>; 6694} 6695 6696// The bit test instructions below are AVX only 6697multiclass avx_bittest<bits<8> opc, string OpcodeStr, RegisterClass RC, 6698 X86MemOperand x86memop, PatFrag mem_frag, ValueType vt> { 6699 def rr : SS48I<opc, MRMSrcReg, (outs), (ins RC:$src1, RC:$src2), 6700 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6701 [(set EFLAGS, (X86testp RC:$src1, (vt RC:$src2)))]>, 6702 Sched<[WriteVecLogic]>, VEX; 6703 def rm : SS48I<opc, MRMSrcMem, (outs), (ins RC:$src1, x86memop:$src2), 6704 !strconcat(OpcodeStr, "\t{$src2, $src1|$src1, $src2}"), 6705 [(set EFLAGS, (X86testp RC:$src1, (mem_frag addr:$src2)))]>, 6706 Sched<[WriteVecLogicLd, ReadAfterLd]>, VEX; 6707} 6708 6709let Defs = [EFLAGS], Predicates = [HasAVX] in { 6710let ExeDomain = SSEPackedSingle in { 6711defm VTESTPS : avx_bittest<0x0E, "vtestps", VR128, f128mem, loadv4f32, v4f32>; 6712defm VTESTPSY : avx_bittest<0x0E, "vtestps", VR256, f256mem, loadv8f32, v8f32>, 6713 VEX_L; 6714} 6715let ExeDomain = SSEPackedDouble in { 6716defm VTESTPD : avx_bittest<0x0F, "vtestpd", VR128, f128mem, loadv2f64, v2f64>; 6717defm VTESTPDY : avx_bittest<0x0F, "vtestpd", VR256, f256mem, loadv4f64, v4f64>, 6718 VEX_L; 6719} 6720} 6721 6722//===----------------------------------------------------------------------===// 6723// SSE4.1 - Misc Instructions 6724//===----------------------------------------------------------------------===// 6725 6726let Defs = [EFLAGS], Predicates = [HasPOPCNT] in { 6727 def POPCNT16rr : I<0xB8, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src), 6728 "popcnt{w}\t{$src, $dst|$dst, $src}", 6729 [(set GR16:$dst, (ctpop GR16:$src)), (implicit EFLAGS)], 6730 IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, 6731 OpSize16, XS; 6732 def POPCNT16rm : I<0xB8, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src), 6733 "popcnt{w}\t{$src, $dst|$dst, $src}", 6734 [(set GR16:$dst, (ctpop (loadi16 addr:$src))), 6735 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6736 Sched<[WriteFAddLd]>, OpSize16, XS; 6737 6738 def POPCNT32rr : I<0xB8, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src), 6739 "popcnt{l}\t{$src, $dst|$dst, $src}", 6740 [(set GR32:$dst, (ctpop GR32:$src)), (implicit EFLAGS)], 6741 IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, 6742 OpSize32, XS; 6743 6744 def POPCNT32rm : I<0xB8, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src), 6745 "popcnt{l}\t{$src, $dst|$dst, $src}", 6746 [(set GR32:$dst, (ctpop (loadi32 addr:$src))), 6747 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6748 Sched<[WriteFAddLd]>, OpSize32, XS; 6749 6750 def POPCNT64rr : RI<0xB8, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src), 6751 "popcnt{q}\t{$src, $dst|$dst, $src}", 6752 [(set GR64:$dst, (ctpop GR64:$src)), (implicit EFLAGS)], 6753 IIC_SSE_POPCNT_RR>, Sched<[WriteFAdd]>, XS; 6754 def POPCNT64rm : RI<0xB8, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src), 6755 "popcnt{q}\t{$src, $dst|$dst, $src}", 6756 [(set GR64:$dst, (ctpop (loadi64 addr:$src))), 6757 (implicit EFLAGS)], IIC_SSE_POPCNT_RM>, 6758 Sched<[WriteFAddLd]>, XS; 6759} 6760 6761 6762 6763// SS41I_unop_rm_int_v16 - SSE 4.1 unary operator whose type is v8i16. 6764multiclass SS41I_unop_rm_int_v16<bits<8> opc, string OpcodeStr, 6765 Intrinsic IntId128, PatFrag ld_frag, 6766 X86FoldableSchedWrite Sched> { 6767 def rr128 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 6768 (ins VR128:$src), 6769 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6770 [(set VR128:$dst, (IntId128 VR128:$src))]>, 6771 Sched<[Sched]>; 6772 def rm128 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 6773 (ins i128mem:$src), 6774 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 6775 [(set VR128:$dst, 6776 (IntId128 (bitconvert (ld_frag addr:$src))))]>, 6777 Sched<[Sched.Folded]>; 6778} 6779 6780// PHMIN has the same profile as PSAD, thus we use the same scheduling 6781// model, although the naming is misleading. 6782let Predicates = [HasAVX] in 6783defm VPHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "vphminposuw", 6784 int_x86_sse41_phminposuw, loadv2i64, 6785 WriteVecIMul>, VEX; 6786defm PHMINPOSUW : SS41I_unop_rm_int_v16 <0x41, "phminposuw", 6787 int_x86_sse41_phminposuw, memopv2i64, 6788 WriteVecIMul>; 6789 6790/// SS48I_binop_rm - Simple SSE41 binary operator. 6791multiclass SS48I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 6792 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6793 X86MemOperand x86memop, bit Is2Addr = 1, 6794 OpndItins itins = SSE_INTALU_ITINS_P> { 6795 let isCommutable = 1 in 6796 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 6797 (ins RC:$src1, RC:$src2), 6798 !if(Is2Addr, 6799 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6800 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6801 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>, 6802 Sched<[itins.Sched]>; 6803 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 6804 (ins RC:$src1, x86memop:$src2), 6805 !if(Is2Addr, 6806 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6807 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6808 [(set RC:$dst, 6809 (OpVT (OpNode RC:$src1, (bitconvert (memop_frag addr:$src2)))))]>, 6810 Sched<[itins.Sched.Folded, ReadAfterLd]>; 6811} 6812 6813/// SS48I_binop_rm2 - Simple SSE41 binary operator with different src and dst 6814/// types. 6815multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, 6816 ValueType DstVT, ValueType SrcVT, RegisterClass RC, 6817 PatFrag memop_frag, X86MemOperand x86memop, 6818 OpndItins itins, 6819 bit IsCommutable = 0, bit Is2Addr = 1> { 6820 let isCommutable = IsCommutable in 6821 def rr : SS48I<opc, MRMSrcReg, (outs RC:$dst), 6822 (ins RC:$src1, RC:$src2), 6823 !if(Is2Addr, 6824 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6825 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6826 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), RC:$src2)))]>, 6827 Sched<[itins.Sched]>; 6828 def rm : SS48I<opc, MRMSrcMem, (outs RC:$dst), 6829 (ins RC:$src1, x86memop:$src2), 6830 !if(Is2Addr, 6831 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 6832 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 6833 [(set RC:$dst, (DstVT (OpNode (SrcVT RC:$src1), 6834 (bitconvert (memop_frag addr:$src2)))))]>, 6835 Sched<[itins.Sched.Folded, ReadAfterLd]>; 6836} 6837 6838let Predicates = [HasAVX, NoVLX] in { 6839 let isCommutable = 0 in 6840 defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, 6841 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6842 VEX_4V; 6843 defm VPMINSD : SS48I_binop_rm<0x39, "vpminsd", X86smin, v4i32, VR128, 6844 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6845 VEX_4V; 6846 defm VPMINUD : SS48I_binop_rm<0x3B, "vpminud", X86umin, v4i32, VR128, 6847 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6848 VEX_4V; 6849 defm VPMINUW : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v8i16, VR128, 6850 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6851 VEX_4V; 6852 defm VPMAXSB : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v16i8, VR128, 6853 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6854 VEX_4V; 6855 defm VPMAXSD : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v4i32, VR128, 6856 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6857 VEX_4V; 6858 defm VPMAXUD : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v4i32, VR128, 6859 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6860 VEX_4V; 6861 defm VPMAXUW : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v8i16, VR128, 6862 loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6863 VEX_4V; 6864 defm VPMULDQ : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v2i64, v4i32, 6865 VR128, loadv2i64, i128mem, 6866 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V; 6867} 6868 6869let Predicates = [HasAVX2, NoVLX] in { 6870 let isCommutable = 0 in 6871 defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, 6872 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6873 VEX_4V, VEX_L; 6874 defm VPMINSDY : SS48I_binop_rm<0x39, "vpminsd", X86smin, v8i32, VR256, 6875 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6876 VEX_4V, VEX_L; 6877 defm VPMINUDY : SS48I_binop_rm<0x3B, "vpminud", X86umin, v8i32, VR256, 6878 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6879 VEX_4V, VEX_L; 6880 defm VPMINUWY : SS48I_binop_rm<0x3A, "vpminuw", X86umin, v16i16, VR256, 6881 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6882 VEX_4V, VEX_L; 6883 defm VPMAXSBY : SS48I_binop_rm<0x3C, "vpmaxsb", X86smax, v32i8, VR256, 6884 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6885 VEX_4V, VEX_L; 6886 defm VPMAXSDY : SS48I_binop_rm<0x3D, "vpmaxsd", X86smax, v8i32, VR256, 6887 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6888 VEX_4V, VEX_L; 6889 defm VPMAXUDY : SS48I_binop_rm<0x3F, "vpmaxud", X86umax, v8i32, VR256, 6890 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6891 VEX_4V, VEX_L; 6892 defm VPMAXUWY : SS48I_binop_rm<0x3E, "vpmaxuw", X86umax, v16i16, VR256, 6893 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6894 VEX_4V, VEX_L; 6895 defm VPMULDQY : SS48I_binop_rm2<0x28, "vpmuldq", X86pmuldq, v4i64, v8i32, 6896 VR256, loadv4i64, i256mem, 6897 SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V, VEX_L; 6898} 6899 6900let Constraints = "$src1 = $dst" in { 6901 let isCommutable = 0 in 6902 defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, 6903 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6904 defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, 6905 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6906 defm PMINUD : SS48I_binop_rm<0x3B, "pminud", X86umin, v4i32, VR128, 6907 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6908 defm PMINUW : SS48I_binop_rm<0x3A, "pminuw", X86umin, v8i16, VR128, 6909 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6910 defm PMAXSB : SS48I_binop_rm<0x3C, "pmaxsb", X86smax, v16i8, VR128, 6911 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6912 defm PMAXSD : SS48I_binop_rm<0x3D, "pmaxsd", X86smax, v4i32, VR128, 6913 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6914 defm PMAXUD : SS48I_binop_rm<0x3F, "pmaxud", X86umax, v4i32, VR128, 6915 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6916 defm PMAXUW : SS48I_binop_rm<0x3E, "pmaxuw", X86umax, v8i16, VR128, 6917 memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; 6918 defm PMULDQ : SS48I_binop_rm2<0x28, "pmuldq", X86pmuldq, v2i64, v4i32, 6919 VR128, memopv2i64, i128mem, 6920 SSE_INTMUL_ITINS_P, 1>; 6921} 6922 6923let Predicates = [HasAVX, NoVLX] in { 6924 defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, VR128, 6925 memopv2i64, i128mem, 0, SSE_PMULLD_ITINS>, 6926 VEX_4V; 6927 defm VPCMPEQQ : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v2i64, VR128, 6928 memopv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, 6929 VEX_4V; 6930} 6931let Predicates = [HasAVX2] in { 6932 defm VPMULLDY : SS48I_binop_rm<0x40, "vpmulld", mul, v8i32, VR256, 6933 loadv4i64, i256mem, 0, SSE_PMULLD_ITINS>, 6934 VEX_4V, VEX_L; 6935 defm VPCMPEQQY : SS48I_binop_rm<0x29, "vpcmpeqq", X86pcmpeq, v4i64, VR256, 6936 loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, 6937 VEX_4V, VEX_L; 6938} 6939 6940let Constraints = "$src1 = $dst" in { 6941 defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32, VR128, 6942 memopv2i64, i128mem, 1, SSE_PMULLD_ITINS>; 6943 defm PCMPEQQ : SS48I_binop_rm<0x29, "pcmpeqq", X86pcmpeq, v2i64, VR128, 6944 memopv2i64, i128mem, 1, SSE_INTALUQ_ITINS_P>; 6945} 6946 6947/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate 6948multiclass SS41I_binop_rmi_int<bits<8> opc, string OpcodeStr, 6949 Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, 6950 X86MemOperand x86memop, bit Is2Addr = 1, 6951 OpndItins itins = DEFAULT_ITINS> { 6952 let isCommutable = 1 in 6953 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6954 (ins RC:$src1, RC:$src2, u8imm:$src3), 6955 !if(Is2Addr, 6956 !strconcat(OpcodeStr, 6957 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6958 !strconcat(OpcodeStr, 6959 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6960 [(set RC:$dst, (IntId RC:$src1, RC:$src2, imm:$src3))], itins.rr>, 6961 Sched<[itins.Sched]>; 6962 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6963 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 6964 !if(Is2Addr, 6965 !strconcat(OpcodeStr, 6966 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6967 !strconcat(OpcodeStr, 6968 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6969 [(set RC:$dst, 6970 (IntId RC:$src1, 6971 (bitconvert (memop_frag addr:$src2)), imm:$src3))], itins.rm>, 6972 Sched<[itins.Sched.Folded, ReadAfterLd]>; 6973} 6974 6975/// SS41I_binop_rmi - SSE 4.1 binary operator with 8-bit immediate 6976multiclass SS41I_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 6977 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 6978 X86MemOperand x86memop, bit Is2Addr = 1, 6979 OpndItins itins = DEFAULT_ITINS> { 6980 let isCommutable = 1 in 6981 def rri : SS4AIi8<opc, MRMSrcReg, (outs RC:$dst), 6982 (ins RC:$src1, RC:$src2, u8imm:$src3), 6983 !if(Is2Addr, 6984 !strconcat(OpcodeStr, 6985 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6986 !strconcat(OpcodeStr, 6987 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6988 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))], 6989 itins.rr>, Sched<[itins.Sched]>; 6990 def rmi : SS4AIi8<opc, MRMSrcMem, (outs RC:$dst), 6991 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 6992 !if(Is2Addr, 6993 !strconcat(OpcodeStr, 6994 "\t{$src3, $src2, $dst|$dst, $src2, $src3}"), 6995 !strconcat(OpcodeStr, 6996 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), 6997 [(set RC:$dst, 6998 (OpVT (OpNode RC:$src1, 6999 (bitconvert (memop_frag addr:$src2)), imm:$src3)))], itins.rm>, 7000 Sched<[itins.Sched.Folded, ReadAfterLd]>; 7001} 7002 7003let Predicates = [HasAVX] in { 7004 let isCommutable = 0 in { 7005 defm VMPSADBW : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_sse41_mpsadbw, 7006 VR128, loadv2i64, i128mem, 0, 7007 DEFAULT_ITINS_MPSADSCHED>, VEX_4V; 7008 } 7009 7010 let ExeDomain = SSEPackedSingle in { 7011 defm VBLENDPS : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v4f32, 7012 VR128, loadv4f32, f128mem, 0, 7013 DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; 7014 defm VBLENDPSY : SS41I_binop_rmi<0x0C, "vblendps", X86Blendi, v8f32, 7015 VR256, loadv8f32, f256mem, 0, 7016 DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L; 7017 } 7018 let ExeDomain = SSEPackedDouble in { 7019 defm VBLENDPD : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v2f64, 7020 VR128, loadv2f64, f128mem, 0, 7021 DEFAULT_ITINS_FBLENDSCHED>, VEX_4V; 7022 defm VBLENDPDY : SS41I_binop_rmi<0x0D, "vblendpd", X86Blendi, v4f64, 7023 VR256, loadv4f64, f256mem, 0, 7024 DEFAULT_ITINS_FBLENDSCHED>, VEX_4V, VEX_L; 7025 } 7026 defm VPBLENDW : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v8i16, 7027 VR128, loadv2i64, i128mem, 0, 7028 DEFAULT_ITINS_BLENDSCHED>, VEX_4V; 7029 7030 let ExeDomain = SSEPackedSingle in 7031 defm VDPPS : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_sse41_dpps, 7032 VR128, loadv4f32, f128mem, 0, 7033 SSE_DPPS_ITINS>, VEX_4V; 7034 let ExeDomain = SSEPackedDouble in 7035 defm VDPPD : SS41I_binop_rmi_int<0x41, "vdppd", int_x86_sse41_dppd, 7036 VR128, loadv2f64, f128mem, 0, 7037 SSE_DPPS_ITINS>, VEX_4V; 7038 let ExeDomain = SSEPackedSingle in 7039 defm VDPPSY : SS41I_binop_rmi_int<0x40, "vdpps", int_x86_avx_dp_ps_256, 7040 VR256, loadv8f32, i256mem, 0, 7041 SSE_DPPS_ITINS>, VEX_4V, VEX_L; 7042} 7043 7044let Predicates = [HasAVX2] in { 7045 let isCommutable = 0 in { 7046 defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, 7047 VR256, loadv4i64, i256mem, 0, 7048 DEFAULT_ITINS_MPSADSCHED>, VEX_4V, VEX_L; 7049 } 7050 defm VPBLENDWY : SS41I_binop_rmi<0x0E, "vpblendw", X86Blendi, v16i16, 7051 VR256, loadv4i64, i256mem, 0, 7052 DEFAULT_ITINS_BLENDSCHED>, VEX_4V, VEX_L; 7053} 7054 7055let Constraints = "$src1 = $dst" in { 7056 let isCommutable = 0 in { 7057 defm MPSADBW : SS41I_binop_rmi_int<0x42, "mpsadbw", int_x86_sse41_mpsadbw, 7058 VR128, memopv2i64, i128mem, 7059 1, SSE_MPSADBW_ITINS>; 7060 } 7061 let ExeDomain = SSEPackedSingle in 7062 defm BLENDPS : SS41I_binop_rmi<0x0C, "blendps", X86Blendi, v4f32, 7063 VR128, memopv4f32, f128mem, 7064 1, SSE_INTALU_ITINS_FBLEND_P>; 7065 let ExeDomain = SSEPackedDouble in 7066 defm BLENDPD : SS41I_binop_rmi<0x0D, "blendpd", X86Blendi, v2f64, 7067 VR128, memopv2f64, f128mem, 7068 1, SSE_INTALU_ITINS_FBLEND_P>; 7069 defm PBLENDW : SS41I_binop_rmi<0x0E, "pblendw", X86Blendi, v8i16, 7070 VR128, memopv2i64, i128mem, 7071 1, SSE_INTALU_ITINS_BLEND_P>; 7072 let ExeDomain = SSEPackedSingle in 7073 defm DPPS : SS41I_binop_rmi_int<0x40, "dpps", int_x86_sse41_dpps, 7074 VR128, memopv4f32, f128mem, 1, 7075 SSE_DPPS_ITINS>; 7076 let ExeDomain = SSEPackedDouble in 7077 defm DPPD : SS41I_binop_rmi_int<0x41, "dppd", int_x86_sse41_dppd, 7078 VR128, memopv2f64, f128mem, 1, 7079 SSE_DPPD_ITINS>; 7080} 7081 7082/// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators 7083multiclass SS41I_quaternary_int_avx<bits<8> opc, string OpcodeStr, 7084 RegisterClass RC, X86MemOperand x86memop, 7085 PatFrag mem_frag, Intrinsic IntId, 7086 X86FoldableSchedWrite Sched> { 7087 def rr : Ii8<opc, MRMSrcReg, (outs RC:$dst), 7088 (ins RC:$src1, RC:$src2, RC:$src3), 7089 !strconcat(OpcodeStr, 7090 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7091 [(set RC:$dst, (IntId RC:$src1, RC:$src2, RC:$src3))], 7092 NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM, 7093 Sched<[Sched]>; 7094 7095 def rm : Ii8<opc, MRMSrcMem, (outs RC:$dst), 7096 (ins RC:$src1, x86memop:$src2, RC:$src3), 7097 !strconcat(OpcodeStr, 7098 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 7099 [(set RC:$dst, 7100 (IntId RC:$src1, (bitconvert (mem_frag addr:$src2)), 7101 RC:$src3))], 7102 NoItinerary, SSEPackedInt>, TAPD, VEX_4V, VEX_I8IMM, 7103 Sched<[Sched.Folded, ReadAfterLd]>; 7104} 7105 7106let Predicates = [HasAVX] in { 7107let ExeDomain = SSEPackedDouble in { 7108defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, f128mem, 7109 loadv2f64, int_x86_sse41_blendvpd, 7110 WriteFVarBlend>; 7111defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, f256mem, 7112 loadv4f64, int_x86_avx_blendv_pd_256, 7113 WriteFVarBlend>, VEX_L; 7114} // ExeDomain = SSEPackedDouble 7115let ExeDomain = SSEPackedSingle in { 7116defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, f128mem, 7117 loadv4f32, int_x86_sse41_blendvps, 7118 WriteFVarBlend>; 7119defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, f256mem, 7120 loadv8f32, int_x86_avx_blendv_ps_256, 7121 WriteFVarBlend>, VEX_L; 7122} // ExeDomain = SSEPackedSingle 7123defm VPBLENDVB : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR128, i128mem, 7124 loadv2i64, int_x86_sse41_pblendvb, 7125 WriteVarBlend>; 7126} 7127 7128let Predicates = [HasAVX2] in { 7129defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, 7130 loadv4i64, int_x86_avx2_pblendvb, 7131 WriteVarBlend>, VEX_L; 7132} 7133 7134let Predicates = [HasAVX] in { 7135 def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), 7136 (v16i8 VR128:$src2))), 7137 (VPBLENDVBrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7138 def : Pat<(v4i32 (vselect (v4i32 VR128:$mask), (v4i32 VR128:$src1), 7139 (v4i32 VR128:$src2))), 7140 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7141 def : Pat<(v4f32 (vselect (v4i32 VR128:$mask), (v4f32 VR128:$src1), 7142 (v4f32 VR128:$src2))), 7143 (VBLENDVPSrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7144 def : Pat<(v2i64 (vselect (v2i64 VR128:$mask), (v2i64 VR128:$src1), 7145 (v2i64 VR128:$src2))), 7146 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7147 def : Pat<(v2f64 (vselect (v2i64 VR128:$mask), (v2f64 VR128:$src1), 7148 (v2f64 VR128:$src2))), 7149 (VBLENDVPDrr VR128:$src2, VR128:$src1, VR128:$mask)>; 7150 def : Pat<(v8i32 (vselect (v8i32 VR256:$mask), (v8i32 VR256:$src1), 7151 (v8i32 VR256:$src2))), 7152 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7153 def : Pat<(v8f32 (vselect (v8i32 VR256:$mask), (v8f32 VR256:$src1), 7154 (v8f32 VR256:$src2))), 7155 (VBLENDVPSYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7156 def : Pat<(v4i64 (vselect (v4i64 VR256:$mask), (v4i64 VR256:$src1), 7157 (v4i64 VR256:$src2))), 7158 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7159 def : Pat<(v4f64 (vselect (v4i64 VR256:$mask), (v4f64 VR256:$src1), 7160 (v4f64 VR256:$src2))), 7161 (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7162} 7163 7164let Predicates = [HasAVX2] in { 7165 def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), 7166 (v32i8 VR256:$src2))), 7167 (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; 7168} 7169 7170// Patterns 7171// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or 7172// on targets where they have equal performance. These were changed to use 7173// blends because blends have better throughput on SandyBridge and Haswell, but 7174// movs[s/d] are 1-2 byte shorter instructions. 7175let Predicates = [UseAVX] in { 7176 let AddedComplexity = 15 in { 7177 // Move scalar to XMM zero-extended, zeroing a VR128 then do a 7178 // MOVS{S,D} to the lower bits. 7179 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))), 7180 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)>; 7181 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 7182 (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 7183 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 7184 (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 7185 def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), 7186 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; 7187 7188 // Move low f32 and clear high bits. 7189 def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), 7190 (VBLENDPSYrri (v8f32 (AVX_SET0)), VR256:$src, (i8 1))>; 7191 7192 // Move low f64 and clear high bits. 7193 def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), 7194 (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; 7195 } 7196 7197 def : Pat<(v8f32 (X86vzmovl (insert_subvector undef, 7198 (v4f32 (scalar_to_vector FR32:$src)), (iPTR 0)))), 7199 (SUBREG_TO_REG (i32 0), 7200 (v4f32 (VMOVSSrr (v4f32 (V_SET0)), FR32:$src)), 7201 sub_xmm)>; 7202 def : Pat<(v4f64 (X86vzmovl (insert_subvector undef, 7203 (v2f64 (scalar_to_vector FR64:$src)), (iPTR 0)))), 7204 (SUBREG_TO_REG (i64 0), 7205 (v2f64 (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)), 7206 sub_xmm)>; 7207 7208 // These will incur an FP/int domain crossing penalty, but it may be the only 7209 // way without AVX2. Do not add any complexity because we may be able to match 7210 // more optimal patterns defined earlier in this file. 7211 def : Pat<(v8i32 (X86vzmovl (v8i32 VR256:$src))), 7212 (VBLENDPSYrri (v8i32 (AVX_SET0)), VR256:$src, (i8 1))>; 7213 def : Pat<(v4i64 (X86vzmovl (v4i64 VR256:$src))), 7214 (VBLENDPDYrri (v4i64 (AVX_SET0)), VR256:$src, (i8 1))>; 7215} 7216 7217// FIXME: Prefer a movss or movsd over a blendps when optimizing for size or 7218// on targets where they have equal performance. These were changed to use 7219// blends because blends have better throughput on SandyBridge and Haswell, but 7220// movs[s/d] are 1-2 byte shorter instructions. 7221let Predicates = [UseSSE41] in { 7222 // With SSE41 we can use blends for these patterns. 7223 def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), 7224 (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; 7225 def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), 7226 (PBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; 7227 def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), 7228 (BLENDPDrri (v2f64 (V_SET0)), VR128:$src, (i8 1))>; 7229} 7230 7231 7232/// SS41I_ternary_int - SSE 4.1 ternary operator 7233let Uses = [XMM0], Constraints = "$src1 = $dst" in { 7234 multiclass SS41I_ternary_int<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 7235 X86MemOperand x86memop, Intrinsic IntId, 7236 OpndItins itins = DEFAULT_ITINS> { 7237 def rr0 : SS48I<opc, MRMSrcReg, (outs VR128:$dst), 7238 (ins VR128:$src1, VR128:$src2), 7239 !strconcat(OpcodeStr, 7240 "\t{$src2, $dst|$dst, $src2}"), 7241 [(set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0))], 7242 itins.rr>, Sched<[itins.Sched]>; 7243 7244 def rm0 : SS48I<opc, MRMSrcMem, (outs VR128:$dst), 7245 (ins VR128:$src1, x86memop:$src2), 7246 !strconcat(OpcodeStr, 7247 "\t{$src2, $dst|$dst, $src2}"), 7248 [(set VR128:$dst, 7249 (IntId VR128:$src1, 7250 (bitconvert (mem_frag addr:$src2)), XMM0))], 7251 itins.rm>, Sched<[itins.Sched.Folded, ReadAfterLd]>; 7252 } 7253} 7254 7255let ExeDomain = SSEPackedDouble in 7256defm BLENDVPD : SS41I_ternary_int<0x15, "blendvpd", memopv2f64, f128mem, 7257 int_x86_sse41_blendvpd, 7258 DEFAULT_ITINS_FBLENDSCHED>; 7259let ExeDomain = SSEPackedSingle in 7260defm BLENDVPS : SS41I_ternary_int<0x14, "blendvps", memopv4f32, f128mem, 7261 int_x86_sse41_blendvps, 7262 DEFAULT_ITINS_FBLENDSCHED>; 7263defm PBLENDVB : SS41I_ternary_int<0x10, "pblendvb", memopv2i64, i128mem, 7264 int_x86_sse41_pblendvb, 7265 DEFAULT_ITINS_VARBLENDSCHED>; 7266 7267// Aliases with the implicit xmm0 argument 7268def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7269 (BLENDVPDrr0 VR128:$dst, VR128:$src2)>; 7270def : InstAlias<"blendvpd\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7271 (BLENDVPDrm0 VR128:$dst, f128mem:$src2)>; 7272def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7273 (BLENDVPSrr0 VR128:$dst, VR128:$src2)>; 7274def : InstAlias<"blendvps\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7275 (BLENDVPSrm0 VR128:$dst, f128mem:$src2)>; 7276def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7277 (PBLENDVBrr0 VR128:$dst, VR128:$src2)>; 7278def : InstAlias<"pblendvb\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7279 (PBLENDVBrm0 VR128:$dst, i128mem:$src2)>; 7280 7281let Predicates = [UseSSE41] in { 7282 def : Pat<(v16i8 (vselect (v16i8 XMM0), (v16i8 VR128:$src1), 7283 (v16i8 VR128:$src2))), 7284 (PBLENDVBrr0 VR128:$src2, VR128:$src1)>; 7285 def : Pat<(v4i32 (vselect (v4i32 XMM0), (v4i32 VR128:$src1), 7286 (v4i32 VR128:$src2))), 7287 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7288 def : Pat<(v4f32 (vselect (v4i32 XMM0), (v4f32 VR128:$src1), 7289 (v4f32 VR128:$src2))), 7290 (BLENDVPSrr0 VR128:$src2, VR128:$src1)>; 7291 def : Pat<(v2i64 (vselect (v2i64 XMM0), (v2i64 VR128:$src1), 7292 (v2i64 VR128:$src2))), 7293 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7294 def : Pat<(v2f64 (vselect (v2i64 XMM0), (v2f64 VR128:$src1), 7295 (v2f64 VR128:$src2))), 7296 (BLENDVPDrr0 VR128:$src2, VR128:$src1)>; 7297} 7298 7299let SchedRW = [WriteLoad] in { 7300let Predicates = [HasAVX] in 7301def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7302 "vmovntdqa\t{$src, $dst|$dst, $src}", 7303 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, 7304 VEX; 7305let Predicates = [HasAVX2] in 7306def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), 7307 "vmovntdqa\t{$src, $dst|$dst, $src}", 7308 [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, 7309 VEX, VEX_L; 7310def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), 7311 "movntdqa\t{$src, $dst|$dst, $src}", 7312 [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>; 7313} // SchedRW 7314 7315//===----------------------------------------------------------------------===// 7316// SSE4.2 - Compare Instructions 7317//===----------------------------------------------------------------------===// 7318 7319/// SS42I_binop_rm - Simple SSE 4.2 binary operator 7320multiclass SS42I_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, 7321 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 7322 X86MemOperand x86memop, bit Is2Addr = 1> { 7323 def rr : SS428I<opc, MRMSrcReg, (outs RC:$dst), 7324 (ins RC:$src1, RC:$src2), 7325 !if(Is2Addr, 7326 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7327 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7328 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2)))]>; 7329 def rm : SS428I<opc, MRMSrcMem, (outs RC:$dst), 7330 (ins RC:$src1, x86memop:$src2), 7331 !if(Is2Addr, 7332 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7333 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7334 [(set RC:$dst, 7335 (OpVT (OpNode RC:$src1, (memop_frag addr:$src2))))]>; 7336} 7337 7338let Predicates = [HasAVX] in 7339 defm VPCMPGTQ : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v2i64, VR128, 7340 loadv2i64, i128mem, 0>, VEX_4V; 7341 7342let Predicates = [HasAVX2] in 7343 defm VPCMPGTQY : SS42I_binop_rm<0x37, "vpcmpgtq", X86pcmpgt, v4i64, VR256, 7344 loadv4i64, i256mem, 0>, VEX_4V, VEX_L; 7345 7346let Constraints = "$src1 = $dst" in 7347 defm PCMPGTQ : SS42I_binop_rm<0x37, "pcmpgtq", X86pcmpgt, v2i64, VR128, 7348 memopv2i64, i128mem>; 7349 7350//===----------------------------------------------------------------------===// 7351// SSE4.2 - String/text Processing Instructions 7352//===----------------------------------------------------------------------===// 7353 7354// Packed Compare Implicit Length Strings, Return Mask 7355multiclass pseudo_pcmpistrm<string asm, PatFrag ld_frag> { 7356 def REG : PseudoI<(outs VR128:$dst), 7357 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7358 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, VR128:$src2, 7359 imm:$src3))]>; 7360 def MEM : PseudoI<(outs VR128:$dst), 7361 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7362 [(set VR128:$dst, (int_x86_sse42_pcmpistrm128 VR128:$src1, 7363 (bc_v16i8 (ld_frag addr:$src2)), imm:$src3))]>; 7364} 7365 7366let Defs = [EFLAGS], usesCustomInserter = 1 in { 7367 defm VPCMPISTRM128 : pseudo_pcmpistrm<"#VPCMPISTRM128", loadv2i64>, 7368 Requires<[HasAVX]>; 7369 defm PCMPISTRM128 : pseudo_pcmpistrm<"#PCMPISTRM128", memopv2i64>, 7370 Requires<[UseSSE42]>; 7371} 7372 7373multiclass pcmpistrm_SS42AI<string asm> { 7374 def rr : SS42AI<0x62, MRMSrcReg, (outs), 7375 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7376 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7377 []>, Sched<[WritePCmpIStrM]>; 7378 let mayLoad = 1 in 7379 def rm :SS42AI<0x62, MRMSrcMem, (outs), 7380 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7381 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7382 []>, Sched<[WritePCmpIStrMLd, ReadAfterLd]>; 7383} 7384 7385let Defs = [XMM0, EFLAGS], hasSideEffects = 0 in { 7386 let Predicates = [HasAVX] in 7387 defm VPCMPISTRM128 : pcmpistrm_SS42AI<"vpcmpistrm">, VEX; 7388 defm PCMPISTRM128 : pcmpistrm_SS42AI<"pcmpistrm"> ; 7389} 7390 7391// Packed Compare Explicit Length Strings, Return Mask 7392multiclass pseudo_pcmpestrm<string asm, PatFrag ld_frag> { 7393 def REG : PseudoI<(outs VR128:$dst), 7394 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 7395 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 7396 VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7397 def MEM : PseudoI<(outs VR128:$dst), 7398 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 7399 [(set VR128:$dst, (int_x86_sse42_pcmpestrm128 VR128:$src1, EAX, 7400 (bc_v16i8 (ld_frag addr:$src3)), EDX, imm:$src5))]>; 7401} 7402 7403let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7404 defm VPCMPESTRM128 : pseudo_pcmpestrm<"#VPCMPESTRM128", loadv2i64>, 7405 Requires<[HasAVX]>; 7406 defm PCMPESTRM128 : pseudo_pcmpestrm<"#PCMPESTRM128", memopv2i64>, 7407 Requires<[UseSSE42]>; 7408} 7409 7410multiclass SS42AI_pcmpestrm<string asm> { 7411 def rr : SS42AI<0x60, MRMSrcReg, (outs), 7412 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 7413 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7414 []>, Sched<[WritePCmpEStrM]>; 7415 let mayLoad = 1 in 7416 def rm : SS42AI<0x60, MRMSrcMem, (outs), 7417 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 7418 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7419 []>, Sched<[WritePCmpEStrMLd, ReadAfterLd]>; 7420} 7421 7422let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 7423 let Predicates = [HasAVX] in 7424 defm VPCMPESTRM128 : SS42AI_pcmpestrm<"vpcmpestrm">, VEX; 7425 defm PCMPESTRM128 : SS42AI_pcmpestrm<"pcmpestrm">; 7426} 7427 7428// Packed Compare Implicit Length Strings, Return Index 7429multiclass pseudo_pcmpistri<string asm, PatFrag ld_frag> { 7430 def REG : PseudoI<(outs GR32:$dst), 7431 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7432 [(set GR32:$dst, EFLAGS, 7433 (X86pcmpistri VR128:$src1, VR128:$src2, imm:$src3))]>; 7434 def MEM : PseudoI<(outs GR32:$dst), 7435 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7436 [(set GR32:$dst, EFLAGS, (X86pcmpistri VR128:$src1, 7437 (bc_v16i8 (ld_frag addr:$src2)), imm:$src3))]>; 7438} 7439 7440let Defs = [EFLAGS], usesCustomInserter = 1 in { 7441 defm VPCMPISTRI : pseudo_pcmpistri<"#VPCMPISTRI", loadv2i64>, 7442 Requires<[HasAVX]>; 7443 defm PCMPISTRI : pseudo_pcmpistri<"#PCMPISTRI", memopv2i64>, 7444 Requires<[UseSSE42]>; 7445} 7446 7447multiclass SS42AI_pcmpistri<string asm> { 7448 def rr : SS42AI<0x63, MRMSrcReg, (outs), 7449 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7450 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7451 []>, Sched<[WritePCmpIStrI]>; 7452 let mayLoad = 1 in 7453 def rm : SS42AI<0x63, MRMSrcMem, (outs), 7454 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7455 !strconcat(asm, "\t{$src3, $src2, $src1|$src1, $src2, $src3}"), 7456 []>, Sched<[WritePCmpIStrILd, ReadAfterLd]>; 7457} 7458 7459let Defs = [ECX, EFLAGS], hasSideEffects = 0 in { 7460 let Predicates = [HasAVX] in 7461 defm VPCMPISTRI : SS42AI_pcmpistri<"vpcmpistri">, VEX; 7462 defm PCMPISTRI : SS42AI_pcmpistri<"pcmpistri">; 7463} 7464 7465// Packed Compare Explicit Length Strings, Return Index 7466multiclass pseudo_pcmpestri<string asm, PatFrag ld_frag> { 7467 def REG : PseudoI<(outs GR32:$dst), 7468 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 7469 [(set GR32:$dst, EFLAGS, 7470 (X86pcmpestri VR128:$src1, EAX, VR128:$src3, EDX, imm:$src5))]>; 7471 def MEM : PseudoI<(outs GR32:$dst), 7472 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 7473 [(set GR32:$dst, EFLAGS, 7474 (X86pcmpestri VR128:$src1, EAX, (bc_v16i8 (ld_frag addr:$src3)), EDX, 7475 imm:$src5))]>; 7476} 7477 7478let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { 7479 defm VPCMPESTRI : pseudo_pcmpestri<"#VPCMPESTRI", loadv2i64>, 7480 Requires<[HasAVX]>; 7481 defm PCMPESTRI : pseudo_pcmpestri<"#PCMPESTRI", memopv2i64>, 7482 Requires<[UseSSE42]>; 7483} 7484 7485multiclass SS42AI_pcmpestri<string asm> { 7486 def rr : SS42AI<0x61, MRMSrcReg, (outs), 7487 (ins VR128:$src1, VR128:$src3, u8imm:$src5), 7488 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7489 []>, Sched<[WritePCmpEStrI]>; 7490 let mayLoad = 1 in 7491 def rm : SS42AI<0x61, MRMSrcMem, (outs), 7492 (ins VR128:$src1, i128mem:$src3, u8imm:$src5), 7493 !strconcat(asm, "\t{$src5, $src3, $src1|$src1, $src3, $src5}"), 7494 []>, Sched<[WritePCmpEStrILd, ReadAfterLd]>; 7495} 7496 7497let Defs = [ECX, EFLAGS], Uses = [EAX, EDX], hasSideEffects = 0 in { 7498 let Predicates = [HasAVX] in 7499 defm VPCMPESTRI : SS42AI_pcmpestri<"vpcmpestri">, VEX; 7500 defm PCMPESTRI : SS42AI_pcmpestri<"pcmpestri">; 7501} 7502 7503//===----------------------------------------------------------------------===// 7504// SSE4.2 - CRC Instructions 7505//===----------------------------------------------------------------------===// 7506 7507// No CRC instructions have AVX equivalents 7508 7509// crc intrinsic instruction 7510// This set of instructions are only rm, the only difference is the size 7511// of r and m. 7512class SS42I_crc32r<bits<8> opc, string asm, RegisterClass RCOut, 7513 RegisterClass RCIn, SDPatternOperator Int> : 7514 SS42FI<opc, MRMSrcReg, (outs RCOut:$dst), (ins RCOut:$src1, RCIn:$src2), 7515 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7516 [(set RCOut:$dst, (Int RCOut:$src1, RCIn:$src2))], IIC_CRC32_REG>, 7517 Sched<[WriteFAdd]>; 7518 7519class SS42I_crc32m<bits<8> opc, string asm, RegisterClass RCOut, 7520 X86MemOperand x86memop, SDPatternOperator Int> : 7521 SS42FI<opc, MRMSrcMem, (outs RCOut:$dst), (ins RCOut:$src1, x86memop:$src2), 7522 !strconcat(asm, "\t{$src2, $src1|$src1, $src2}"), 7523 [(set RCOut:$dst, (Int RCOut:$src1, (load addr:$src2)))], 7524 IIC_CRC32_MEM>, Sched<[WriteFAddLd, ReadAfterLd]>; 7525 7526let Constraints = "$src1 = $dst" in { 7527 def CRC32r32m8 : SS42I_crc32m<0xF0, "crc32{b}", GR32, i8mem, 7528 int_x86_sse42_crc32_32_8>; 7529 def CRC32r32r8 : SS42I_crc32r<0xF0, "crc32{b}", GR32, GR8, 7530 int_x86_sse42_crc32_32_8>; 7531 def CRC32r32m16 : SS42I_crc32m<0xF1, "crc32{w}", GR32, i16mem, 7532 int_x86_sse42_crc32_32_16>, OpSize16; 7533 def CRC32r32r16 : SS42I_crc32r<0xF1, "crc32{w}", GR32, GR16, 7534 int_x86_sse42_crc32_32_16>, OpSize16; 7535 def CRC32r32m32 : SS42I_crc32m<0xF1, "crc32{l}", GR32, i32mem, 7536 int_x86_sse42_crc32_32_32>, OpSize32; 7537 def CRC32r32r32 : SS42I_crc32r<0xF1, "crc32{l}", GR32, GR32, 7538 int_x86_sse42_crc32_32_32>, OpSize32; 7539 def CRC32r64m64 : SS42I_crc32m<0xF1, "crc32{q}", GR64, i64mem, 7540 int_x86_sse42_crc32_64_64>, REX_W; 7541 def CRC32r64r64 : SS42I_crc32r<0xF1, "crc32{q}", GR64, GR64, 7542 int_x86_sse42_crc32_64_64>, REX_W; 7543 let hasSideEffects = 0 in { 7544 let mayLoad = 1 in 7545 def CRC32r64m8 : SS42I_crc32m<0xF0, "crc32{b}", GR64, i8mem, 7546 null_frag>, REX_W; 7547 def CRC32r64r8 : SS42I_crc32r<0xF0, "crc32{b}", GR64, GR8, 7548 null_frag>, REX_W; 7549 } 7550} 7551 7552//===----------------------------------------------------------------------===// 7553// SHA-NI Instructions 7554//===----------------------------------------------------------------------===// 7555 7556multiclass SHAI_binop<bits<8> Opc, string OpcodeStr, Intrinsic IntId, 7557 bit UsesXMM0 = 0> { 7558 def rr : I<Opc, MRMSrcReg, (outs VR128:$dst), 7559 (ins VR128:$src1, VR128:$src2), 7560 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7561 [!if(UsesXMM0, 7562 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2, XMM0)), 7563 (set VR128:$dst, (IntId VR128:$src1, VR128:$src2)))]>, T8; 7564 7565 def rm : I<Opc, MRMSrcMem, (outs VR128:$dst), 7566 (ins VR128:$src1, i128mem:$src2), 7567 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7568 [!if(UsesXMM0, 7569 (set VR128:$dst, (IntId VR128:$src1, 7570 (bc_v4i32 (memopv2i64 addr:$src2)), XMM0)), 7571 (set VR128:$dst, (IntId VR128:$src1, 7572 (bc_v4i32 (memopv2i64 addr:$src2)))))]>, T8; 7573} 7574 7575let Constraints = "$src1 = $dst", Predicates = [HasSHA] in { 7576 def SHA1RNDS4rri : Ii8<0xCC, MRMSrcReg, (outs VR128:$dst), 7577 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7578 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7579 [(set VR128:$dst, 7580 (int_x86_sha1rnds4 VR128:$src1, VR128:$src2, 7581 (i8 imm:$src3)))]>, TA; 7582 def SHA1RNDS4rmi : Ii8<0xCC, MRMSrcMem, (outs VR128:$dst), 7583 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7584 "sha1rnds4\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7585 [(set VR128:$dst, 7586 (int_x86_sha1rnds4 VR128:$src1, 7587 (bc_v4i32 (memopv2i64 addr:$src2)), 7588 (i8 imm:$src3)))]>, TA; 7589 7590 defm SHA1NEXTE : SHAI_binop<0xC8, "sha1nexte", int_x86_sha1nexte>; 7591 defm SHA1MSG1 : SHAI_binop<0xC9, "sha1msg1", int_x86_sha1msg1>; 7592 defm SHA1MSG2 : SHAI_binop<0xCA, "sha1msg2", int_x86_sha1msg2>; 7593 7594 let Uses=[XMM0] in 7595 defm SHA256RNDS2 : SHAI_binop<0xCB, "sha256rnds2", int_x86_sha256rnds2, 1>; 7596 7597 defm SHA256MSG1 : SHAI_binop<0xCC, "sha256msg1", int_x86_sha256msg1>; 7598 defm SHA256MSG2 : SHAI_binop<0xCD, "sha256msg2", int_x86_sha256msg2>; 7599} 7600 7601// Aliases with explicit %xmm0 7602def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7603 (SHA256RNDS2rr VR128:$dst, VR128:$src2)>; 7604def : InstAlias<"sha256rnds2\t{%xmm0, $src2, $dst|$dst, $src2, xmm0}", 7605 (SHA256RNDS2rm VR128:$dst, i128mem:$src2)>; 7606 7607//===----------------------------------------------------------------------===// 7608// AES-NI Instructions 7609//===----------------------------------------------------------------------===// 7610 7611multiclass AESI_binop_rm_int<bits<8> opc, string OpcodeStr, Intrinsic IntId128, 7612 PatFrag ld_frag, bit Is2Addr = 1> { 7613 def rr : AES8I<opc, MRMSrcReg, (outs VR128:$dst), 7614 (ins VR128:$src1, VR128:$src2), 7615 !if(Is2Addr, 7616 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7617 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7618 [(set VR128:$dst, (IntId128 VR128:$src1, VR128:$src2))]>, 7619 Sched<[WriteAESDecEnc]>; 7620 def rm : AES8I<opc, MRMSrcMem, (outs VR128:$dst), 7621 (ins VR128:$src1, i128mem:$src2), 7622 !if(Is2Addr, 7623 !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), 7624 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), 7625 [(set VR128:$dst, 7626 (IntId128 VR128:$src1, (ld_frag addr:$src2)))]>, 7627 Sched<[WriteAESDecEncLd, ReadAfterLd]>; 7628} 7629 7630// Perform One Round of an AES Encryption/Decryption Flow 7631let Predicates = [HasAVX, HasAES] in { 7632 defm VAESENC : AESI_binop_rm_int<0xDC, "vaesenc", 7633 int_x86_aesni_aesenc, loadv2i64, 0>, VEX_4V; 7634 defm VAESENCLAST : AESI_binop_rm_int<0xDD, "vaesenclast", 7635 int_x86_aesni_aesenclast, loadv2i64, 0>, VEX_4V; 7636 defm VAESDEC : AESI_binop_rm_int<0xDE, "vaesdec", 7637 int_x86_aesni_aesdec, loadv2i64, 0>, VEX_4V; 7638 defm VAESDECLAST : AESI_binop_rm_int<0xDF, "vaesdeclast", 7639 int_x86_aesni_aesdeclast, loadv2i64, 0>, VEX_4V; 7640} 7641 7642let Constraints = "$src1 = $dst" in { 7643 defm AESENC : AESI_binop_rm_int<0xDC, "aesenc", 7644 int_x86_aesni_aesenc, memopv2i64>; 7645 defm AESENCLAST : AESI_binop_rm_int<0xDD, "aesenclast", 7646 int_x86_aesni_aesenclast, memopv2i64>; 7647 defm AESDEC : AESI_binop_rm_int<0xDE, "aesdec", 7648 int_x86_aesni_aesdec, memopv2i64>; 7649 defm AESDECLAST : AESI_binop_rm_int<0xDF, "aesdeclast", 7650 int_x86_aesni_aesdeclast, memopv2i64>; 7651} 7652 7653// Perform the AES InvMixColumn Transformation 7654let Predicates = [HasAVX, HasAES] in { 7655 def VAESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7656 (ins VR128:$src1), 7657 "vaesimc\t{$src1, $dst|$dst, $src1}", 7658 [(set VR128:$dst, 7659 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>, 7660 VEX; 7661 def VAESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7662 (ins i128mem:$src1), 7663 "vaesimc\t{$src1, $dst|$dst, $src1}", 7664 [(set VR128:$dst, (int_x86_aesni_aesimc (loadv2i64 addr:$src1)))]>, 7665 Sched<[WriteAESIMCLd]>, VEX; 7666} 7667def AESIMCrr : AES8I<0xDB, MRMSrcReg, (outs VR128:$dst), 7668 (ins VR128:$src1), 7669 "aesimc\t{$src1, $dst|$dst, $src1}", 7670 [(set VR128:$dst, 7671 (int_x86_aesni_aesimc VR128:$src1))]>, Sched<[WriteAESIMC]>; 7672def AESIMCrm : AES8I<0xDB, MRMSrcMem, (outs VR128:$dst), 7673 (ins i128mem:$src1), 7674 "aesimc\t{$src1, $dst|$dst, $src1}", 7675 [(set VR128:$dst, (int_x86_aesni_aesimc (memopv2i64 addr:$src1)))]>, 7676 Sched<[WriteAESIMCLd]>; 7677 7678// AES Round Key Generation Assist 7679let Predicates = [HasAVX, HasAES] in { 7680 def VAESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7681 (ins VR128:$src1, u8imm:$src2), 7682 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7683 [(set VR128:$dst, 7684 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7685 Sched<[WriteAESKeyGen]>, VEX; 7686 def VAESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7687 (ins i128mem:$src1, u8imm:$src2), 7688 "vaeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7689 [(set VR128:$dst, 7690 (int_x86_aesni_aeskeygenassist (loadv2i64 addr:$src1), imm:$src2))]>, 7691 Sched<[WriteAESKeyGenLd]>, VEX; 7692} 7693def AESKEYGENASSIST128rr : AESAI<0xDF, MRMSrcReg, (outs VR128:$dst), 7694 (ins VR128:$src1, u8imm:$src2), 7695 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7696 [(set VR128:$dst, 7697 (int_x86_aesni_aeskeygenassist VR128:$src1, imm:$src2))]>, 7698 Sched<[WriteAESKeyGen]>; 7699def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), 7700 (ins i128mem:$src1, u8imm:$src2), 7701 "aeskeygenassist\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7702 [(set VR128:$dst, 7703 (int_x86_aesni_aeskeygenassist (memopv2i64 addr:$src1), imm:$src2))]>, 7704 Sched<[WriteAESKeyGenLd]>; 7705 7706//===----------------------------------------------------------------------===// 7707// PCLMUL Instructions 7708//===----------------------------------------------------------------------===// 7709 7710// AVX carry-less Multiplication instructions 7711let isCommutable = 1 in 7712def VPCLMULQDQrr : AVXPCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7713 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7714 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7715 [(set VR128:$dst, 7716 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))]>, 7717 Sched<[WriteCLMul]>; 7718 7719def VPCLMULQDQrm : AVXPCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7720 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7721 "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7722 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7723 (loadv2i64 addr:$src2), imm:$src3))]>, 7724 Sched<[WriteCLMulLd, ReadAfterLd]>; 7725 7726// Carry-less Multiplication instructions 7727let Constraints = "$src1 = $dst" in { 7728let isCommutable = 1 in 7729def PCLMULQDQrr : PCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), 7730 (ins VR128:$src1, VR128:$src2, u8imm:$src3), 7731 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7732 [(set VR128:$dst, 7733 (int_x86_pclmulqdq VR128:$src1, VR128:$src2, imm:$src3))], 7734 IIC_SSE_PCLMULQDQ_RR>, Sched<[WriteCLMul]>; 7735 7736def PCLMULQDQrm : PCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), 7737 (ins VR128:$src1, i128mem:$src2, u8imm:$src3), 7738 "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", 7739 [(set VR128:$dst, (int_x86_pclmulqdq VR128:$src1, 7740 (memopv2i64 addr:$src2), imm:$src3))], 7741 IIC_SSE_PCLMULQDQ_RM>, 7742 Sched<[WriteCLMulLd, ReadAfterLd]>; 7743} // Constraints = "$src1 = $dst" 7744 7745 7746multiclass pclmul_alias<string asm, int immop> { 7747 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7748 (PCLMULQDQrr VR128:$dst, VR128:$src, immop), 0>; 7749 7750 def : InstAlias<!strconcat("pclmul", asm, "dq {$src, $dst|$dst, $src}"), 7751 (PCLMULQDQrm VR128:$dst, i128mem:$src, immop), 0>; 7752 7753 def : InstAlias<!strconcat("vpclmul", asm, 7754 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7755 (VPCLMULQDQrr VR128:$dst, VR128:$src1, VR128:$src2, immop), 7756 0>; 7757 7758 def : InstAlias<!strconcat("vpclmul", asm, 7759 "dq {$src2, $src1, $dst|$dst, $src1, $src2}"), 7760 (VPCLMULQDQrm VR128:$dst, VR128:$src1, i128mem:$src2, immop), 7761 0>; 7762} 7763defm : pclmul_alias<"hqhq", 0x11>; 7764defm : pclmul_alias<"hqlq", 0x01>; 7765defm : pclmul_alias<"lqhq", 0x10>; 7766defm : pclmul_alias<"lqlq", 0x00>; 7767 7768//===----------------------------------------------------------------------===// 7769// SSE4A Instructions 7770//===----------------------------------------------------------------------===// 7771 7772let Predicates = [HasSSE4A] in { 7773 7774let Constraints = "$src = $dst" in { 7775def EXTRQI : Ii8<0x78, MRMXr, (outs VR128:$dst), 7776 (ins VR128:$src, u8imm:$len, u8imm:$idx), 7777 "extrq\t{$idx, $len, $src|$src, $len, $idx}", 7778 [(set VR128:$dst, (int_x86_sse4a_extrqi VR128:$src, imm:$len, 7779 imm:$idx))]>, PD; 7780def EXTRQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7781 (ins VR128:$src, VR128:$mask), 7782 "extrq\t{$mask, $src|$src, $mask}", 7783 [(set VR128:$dst, (int_x86_sse4a_extrq VR128:$src, 7784 VR128:$mask))]>, PD; 7785 7786def INSERTQI : Ii8<0x78, MRMSrcReg, (outs VR128:$dst), 7787 (ins VR128:$src, VR128:$src2, u8imm:$len, u8imm:$idx), 7788 "insertq\t{$idx, $len, $src2, $src|$src, $src2, $len, $idx}", 7789 [(set VR128:$dst, (int_x86_sse4a_insertqi VR128:$src, 7790 VR128:$src2, imm:$len, imm:$idx))]>, XD; 7791def INSERTQ : I<0x79, MRMSrcReg, (outs VR128:$dst), 7792 (ins VR128:$src, VR128:$mask), 7793 "insertq\t{$mask, $src|$src, $mask}", 7794 [(set VR128:$dst, (int_x86_sse4a_insertq VR128:$src, 7795 VR128:$mask))]>, XD; 7796} 7797 7798def MOVNTSS : I<0x2B, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src), 7799 "movntss\t{$src, $dst|$dst, $src}", 7800 [(int_x86_sse4a_movnt_ss addr:$dst, VR128:$src)]>, XS; 7801 7802def MOVNTSD : I<0x2B, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src), 7803 "movntsd\t{$src, $dst|$dst, $src}", 7804 [(int_x86_sse4a_movnt_sd addr:$dst, VR128:$src)]>, XD; 7805} 7806 7807//===----------------------------------------------------------------------===// 7808// AVX Instructions 7809//===----------------------------------------------------------------------===// 7810 7811//===----------------------------------------------------------------------===// 7812// VBROADCAST - Load from memory and broadcast to all elements of the 7813// destination operand 7814// 7815class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, 7816 X86MemOperand x86memop, Intrinsic Int, SchedWrite Sched> : 7817 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7818 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7819 [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX; 7820 7821class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC, 7822 X86MemOperand x86memop, ValueType VT, 7823 PatFrag ld_frag, SchedWrite Sched> : 7824 AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 7825 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7826 [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>, 7827 Sched<[Sched]>, VEX { 7828 let mayLoad = 1; 7829} 7830 7831// AVX2 adds register forms 7832class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, 7833 Intrinsic Int, SchedWrite Sched> : 7834 AVX28I<opc, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 7835 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 7836 [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX; 7837 7838let ExeDomain = SSEPackedSingle in { 7839 def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128, 7840 f32mem, v4f32, loadf32, WriteLoad>; 7841 def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256, 7842 f32mem, v8f32, loadf32, 7843 WriteFShuffleLd>, VEX_L; 7844} 7845let ExeDomain = SSEPackedDouble in 7846def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem, 7847 v4f64, loadf64, WriteFShuffleLd>, VEX_L; 7848def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, 7849 int_x86_avx_vbroadcastf128_pd_256, 7850 WriteFShuffleLd>, VEX_L; 7851 7852let ExeDomain = SSEPackedSingle in { 7853 def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, 7854 int_x86_avx2_vbroadcast_ss_ps, 7855 WriteFShuffle>; 7856 def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, 7857 int_x86_avx2_vbroadcast_ss_ps_256, 7858 WriteFShuffle256>, VEX_L; 7859} 7860let ExeDomain = SSEPackedDouble in 7861def VBROADCASTSDYrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, 7862 int_x86_avx2_vbroadcast_sd_pd_256, 7863 WriteFShuffle256>, VEX_L; 7864 7865let Predicates = [HasAVX2] in 7866def VBROADCASTI128 : avx_broadcast_no_int<0x5A, "vbroadcasti128", VR256, 7867 i128mem, v4i64, loadv2i64, 7868 WriteLoad>, VEX_L; 7869 7870let Predicates = [HasAVX] in 7871def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), 7872 (VBROADCASTF128 addr:$src)>; 7873 7874 7875//===----------------------------------------------------------------------===// 7876// VINSERTF128 - Insert packed floating-point values 7877// 7878let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7879def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), 7880 (ins VR256:$src1, VR128:$src2, u8imm:$src3), 7881 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7882 []>, Sched<[WriteFShuffle]>, VEX_4V, VEX_L; 7883let mayLoad = 1 in 7884def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), 7885 (ins VR256:$src1, f128mem:$src2, u8imm:$src3), 7886 "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 7887 []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; 7888} 7889 7890let Predicates = [HasAVX] in { 7891def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), 7892 (iPTR imm)), 7893 (VINSERTF128rr VR256:$src1, VR128:$src2, 7894 (INSERT_get_vinsert128_imm VR256:$ins))>; 7895def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (v2f64 VR128:$src2), 7896 (iPTR imm)), 7897 (VINSERTF128rr VR256:$src1, VR128:$src2, 7898 (INSERT_get_vinsert128_imm VR256:$ins))>; 7899 7900def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (loadv4f32 addr:$src2), 7901 (iPTR imm)), 7902 (VINSERTF128rm VR256:$src1, addr:$src2, 7903 (INSERT_get_vinsert128_imm VR256:$ins))>; 7904def : Pat<(vinsert128_insert:$ins (v4f64 VR256:$src1), (loadv2f64 addr:$src2), 7905 (iPTR imm)), 7906 (VINSERTF128rm VR256:$src1, addr:$src2, 7907 (INSERT_get_vinsert128_imm VR256:$ins))>; 7908} 7909 7910let Predicates = [HasAVX1Only] in { 7911def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 7912 (iPTR imm)), 7913 (VINSERTF128rr VR256:$src1, VR128:$src2, 7914 (INSERT_get_vinsert128_imm VR256:$ins))>; 7915def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 7916 (iPTR imm)), 7917 (VINSERTF128rr VR256:$src1, VR128:$src2, 7918 (INSERT_get_vinsert128_imm VR256:$ins))>; 7919def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 7920 (iPTR imm)), 7921 (VINSERTF128rr VR256:$src1, VR128:$src2, 7922 (INSERT_get_vinsert128_imm VR256:$ins))>; 7923def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 7924 (iPTR imm)), 7925 (VINSERTF128rr VR256:$src1, VR128:$src2, 7926 (INSERT_get_vinsert128_imm VR256:$ins))>; 7927 7928def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 7929 (iPTR imm)), 7930 (VINSERTF128rm VR256:$src1, addr:$src2, 7931 (INSERT_get_vinsert128_imm VR256:$ins))>; 7932def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 7933 (bc_v4i32 (loadv2i64 addr:$src2)), 7934 (iPTR imm)), 7935 (VINSERTF128rm VR256:$src1, addr:$src2, 7936 (INSERT_get_vinsert128_imm VR256:$ins))>; 7937def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 7938 (bc_v16i8 (loadv2i64 addr:$src2)), 7939 (iPTR imm)), 7940 (VINSERTF128rm VR256:$src1, addr:$src2, 7941 (INSERT_get_vinsert128_imm VR256:$ins))>; 7942def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 7943 (bc_v8i16 (loadv2i64 addr:$src2)), 7944 (iPTR imm)), 7945 (VINSERTF128rm VR256:$src1, addr:$src2, 7946 (INSERT_get_vinsert128_imm VR256:$ins))>; 7947} 7948 7949//===----------------------------------------------------------------------===// 7950// VEXTRACTF128 - Extract packed floating-point values 7951// 7952let hasSideEffects = 0, ExeDomain = SSEPackedSingle in { 7953def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), 7954 (ins VR256:$src1, u8imm:$src2), 7955 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7956 []>, Sched<[WriteFShuffle]>, VEX, VEX_L; 7957let mayStore = 1 in 7958def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), 7959 (ins f128mem:$dst, VR256:$src1, u8imm:$src2), 7960 "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", 7961 []>, Sched<[WriteStore]>, VEX, VEX_L; 7962} 7963 7964// AVX1 patterns 7965let Predicates = [HasAVX] in { 7966def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7967 (v4f32 (VEXTRACTF128rr 7968 (v8f32 VR256:$src1), 7969 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7970def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7971 (v2f64 (VEXTRACTF128rr 7972 (v4f64 VR256:$src1), 7973 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7974 7975def : Pat<(store (v4f32 (vextract128_extract:$ext (v8f32 VR256:$src1), 7976 (iPTR imm))), addr:$dst), 7977 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7978 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7979def : Pat<(store (v2f64 (vextract128_extract:$ext (v4f64 VR256:$src1), 7980 (iPTR imm))), addr:$dst), 7981 (VEXTRACTF128mr addr:$dst, VR256:$src1, 7982 (EXTRACT_get_vextract128_imm VR128:$ext))>; 7983} 7984 7985let Predicates = [HasAVX1Only] in { 7986def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7987 (v2i64 (VEXTRACTF128rr 7988 (v4i64 VR256:$src1), 7989 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7990def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7991 (v4i32 (VEXTRACTF128rr 7992 (v8i32 VR256:$src1), 7993 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7994def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7995 (v8i16 (VEXTRACTF128rr 7996 (v16i16 VR256:$src1), 7997 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 7998def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 7999 (v16i8 (VEXTRACTF128rr 8000 (v32i8 VR256:$src1), 8001 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8002 8003def : Pat<(alignedstore (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8004 (iPTR imm))), addr:$dst), 8005 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8006 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8007def : Pat<(alignedstore (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8008 (iPTR imm))), addr:$dst), 8009 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8010 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8011def : Pat<(alignedstore (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8012 (iPTR imm))), addr:$dst), 8013 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8014 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8015def : Pat<(alignedstore (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8016 (iPTR imm))), addr:$dst), 8017 (VEXTRACTF128mr addr:$dst, VR256:$src1, 8018 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8019} 8020 8021//===----------------------------------------------------------------------===// 8022// VMASKMOV - Conditional SIMD Packed Loads and Stores 8023// 8024multiclass avx_movmask_rm<bits<8> opc_rm, bits<8> opc_mr, string OpcodeStr, 8025 Intrinsic IntLd, Intrinsic IntLd256, 8026 Intrinsic IntSt, Intrinsic IntSt256> { 8027 def rm : AVX8I<opc_rm, MRMSrcMem, (outs VR128:$dst), 8028 (ins VR128:$src1, f128mem:$src2), 8029 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8030 [(set VR128:$dst, (IntLd addr:$src2, VR128:$src1))]>, 8031 VEX_4V; 8032 def Yrm : AVX8I<opc_rm, MRMSrcMem, (outs VR256:$dst), 8033 (ins VR256:$src1, f256mem:$src2), 8034 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8035 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8036 VEX_4V, VEX_L; 8037 def mr : AVX8I<opc_mr, MRMDestMem, (outs), 8038 (ins f128mem:$dst, VR128:$src1, VR128:$src2), 8039 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8040 [(IntSt addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8041 def Ymr : AVX8I<opc_mr, MRMDestMem, (outs), 8042 (ins f256mem:$dst, VR256:$src1, VR256:$src2), 8043 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8044 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8045} 8046 8047let ExeDomain = SSEPackedSingle in 8048defm VMASKMOVPS : avx_movmask_rm<0x2C, 0x2E, "vmaskmovps", 8049 int_x86_avx_maskload_ps, 8050 int_x86_avx_maskload_ps_256, 8051 int_x86_avx_maskstore_ps, 8052 int_x86_avx_maskstore_ps_256>; 8053let ExeDomain = SSEPackedDouble in 8054defm VMASKMOVPD : avx_movmask_rm<0x2D, 0x2F, "vmaskmovpd", 8055 int_x86_avx_maskload_pd, 8056 int_x86_avx_maskload_pd_256, 8057 int_x86_avx_maskstore_pd, 8058 int_x86_avx_maskstore_pd_256>; 8059 8060//===----------------------------------------------------------------------===// 8061// VPERMIL - Permute Single and Double Floating-Point Values 8062// 8063multiclass avx_permil<bits<8> opc_rm, bits<8> opc_rmi, string OpcodeStr, 8064 RegisterClass RC, X86MemOperand x86memop_f, 8065 X86MemOperand x86memop_i, PatFrag i_frag, 8066 Intrinsic IntVar, ValueType vt> { 8067 def rr : AVX8I<opc_rm, MRMSrcReg, (outs RC:$dst), 8068 (ins RC:$src1, RC:$src2), 8069 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8070 [(set RC:$dst, (IntVar RC:$src1, RC:$src2))]>, VEX_4V, 8071 Sched<[WriteFShuffle]>; 8072 def rm : AVX8I<opc_rm, MRMSrcMem, (outs RC:$dst), 8073 (ins RC:$src1, x86memop_i:$src2), 8074 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8075 [(set RC:$dst, (IntVar RC:$src1, 8076 (bitconvert (i_frag addr:$src2))))]>, VEX_4V, 8077 Sched<[WriteFShuffleLd, ReadAfterLd]>; 8078 8079 def ri : AVXAIi8<opc_rmi, MRMSrcReg, (outs RC:$dst), 8080 (ins RC:$src1, u8imm:$src2), 8081 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8082 [(set RC:$dst, (vt (X86VPermilpi RC:$src1, (i8 imm:$src2))))]>, VEX, 8083 Sched<[WriteFShuffle]>; 8084 def mi : AVXAIi8<opc_rmi, MRMSrcMem, (outs RC:$dst), 8085 (ins x86memop_f:$src1, u8imm:$src2), 8086 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8087 [(set RC:$dst, 8088 (vt (X86VPermilpi (load addr:$src1), (i8 imm:$src2))))]>, VEX, 8089 Sched<[WriteFShuffleLd]>; 8090} 8091 8092let ExeDomain = SSEPackedSingle in { 8093 defm VPERMILPS : avx_permil<0x0C, 0x04, "vpermilps", VR128, f128mem, i128mem, 8094 loadv2i64, int_x86_avx_vpermilvar_ps, v4f32>; 8095 defm VPERMILPSY : avx_permil<0x0C, 0x04, "vpermilps", VR256, f256mem, i256mem, 8096 loadv4i64, int_x86_avx_vpermilvar_ps_256, v8f32>, VEX_L; 8097} 8098let ExeDomain = SSEPackedDouble in { 8099 defm VPERMILPD : avx_permil<0x0D, 0x05, "vpermilpd", VR128, f128mem, i128mem, 8100 loadv2i64, int_x86_avx_vpermilvar_pd, v2f64>; 8101 defm VPERMILPDY : avx_permil<0x0D, 0x05, "vpermilpd", VR256, f256mem, i256mem, 8102 loadv4i64, int_x86_avx_vpermilvar_pd_256, v4f64>, VEX_L; 8103} 8104 8105let Predicates = [HasAVX] in { 8106def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (v8i32 VR256:$src2))), 8107 (VPERMILPSYrr VR256:$src1, VR256:$src2)>; 8108def : Pat<(v8f32 (X86VPermilpv VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)))), 8109 (VPERMILPSYrm VR256:$src1, addr:$src2)>; 8110def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (v4i64 VR256:$src2))), 8111 (VPERMILPDYrr VR256:$src1, VR256:$src2)>; 8112def : Pat<(v4f64 (X86VPermilpv VR256:$src1, (loadv4i64 addr:$src2))), 8113 (VPERMILPDYrm VR256:$src1, addr:$src2)>; 8114 8115def : Pat<(v8i32 (X86VPermilpi VR256:$src1, (i8 imm:$imm))), 8116 (VPERMILPSYri VR256:$src1, imm:$imm)>; 8117def : Pat<(v4i64 (X86VPermilpi VR256:$src1, (i8 imm:$imm))), 8118 (VPERMILPDYri VR256:$src1, imm:$imm)>; 8119def : Pat<(v8i32 (X86VPermilpi (bc_v8i32 (loadv4i64 addr:$src1)), 8120 (i8 imm:$imm))), 8121 (VPERMILPSYmi addr:$src1, imm:$imm)>; 8122def : Pat<(v4i64 (X86VPermilpi (loadv4i64 addr:$src1), (i8 imm:$imm))), 8123 (VPERMILPDYmi addr:$src1, imm:$imm)>; 8124 8125def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (v4i32 VR128:$src2))), 8126 (VPERMILPSrr VR128:$src1, VR128:$src2)>; 8127def : Pat<(v4f32 (X86VPermilpv VR128:$src1, (bc_v4i32 (loadv2i64 addr:$src2)))), 8128 (VPERMILPSrm VR128:$src1, addr:$src2)>; 8129def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (v2i64 VR128:$src2))), 8130 (VPERMILPDrr VR128:$src1, VR128:$src2)>; 8131def : Pat<(v2f64 (X86VPermilpv VR128:$src1, (loadv2i64 addr:$src2))), 8132 (VPERMILPDrm VR128:$src1, addr:$src2)>; 8133 8134def : Pat<(v2i64 (X86VPermilpi VR128:$src1, (i8 imm:$imm))), 8135 (VPERMILPDri VR128:$src1, imm:$imm)>; 8136def : Pat<(v2i64 (X86VPermilpi (loadv2i64 addr:$src1), (i8 imm:$imm))), 8137 (VPERMILPDmi addr:$src1, imm:$imm)>; 8138} 8139 8140//===----------------------------------------------------------------------===// 8141// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks 8142// 8143let ExeDomain = SSEPackedSingle in { 8144def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), 8145 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 8146 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8147 [(set VR256:$dst, (v8f32 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8148 (i8 imm:$src3))))]>, VEX_4V, VEX_L, 8149 Sched<[WriteFShuffle]>; 8150def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), 8151 (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 8152 "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8153 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv8f32 addr:$src2), 8154 (i8 imm:$src3)))]>, VEX_4V, VEX_L, 8155 Sched<[WriteFShuffleLd, ReadAfterLd]>; 8156} 8157 8158let Predicates = [HasAVX] in { 8159def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8160 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8161def : Pat<(v4f64 (X86VPerm2x128 VR256:$src1, 8162 (loadv4f64 addr:$src2), (i8 imm:$imm))), 8163 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8164} 8165 8166let Predicates = [HasAVX1Only] in { 8167def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8168 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8169def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8170 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8171def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8172 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8173def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8174 (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8175 8176def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, 8177 (bc_v8i32 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8178 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8179def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, 8180 (loadv4i64 addr:$src2), (i8 imm:$imm))), 8181 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8182def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, 8183 (bc_v32i8 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8184 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8185def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8186 (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8187 (VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>; 8188} 8189 8190//===----------------------------------------------------------------------===// 8191// VZERO - Zero YMM registers 8192// 8193let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, 8194 YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15] in { 8195 // Zero All YMM registers 8196 def VZEROALL : I<0x77, RawFrm, (outs), (ins), "vzeroall", 8197 [(int_x86_avx_vzeroall)]>, PS, VEX, VEX_L, Requires<[HasAVX]>; 8198 8199 // Zero Upper bits of YMM registers 8200 def VZEROUPPER : I<0x77, RawFrm, (outs), (ins), "vzeroupper", 8201 [(int_x86_avx_vzeroupper)]>, PS, VEX, Requires<[HasAVX]>; 8202} 8203 8204//===----------------------------------------------------------------------===// 8205// Half precision conversion instructions 8206//===----------------------------------------------------------------------===// 8207multiclass f16c_ph2ps<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 8208 def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), 8209 "vcvtph2ps\t{$src, $dst|$dst, $src}", 8210 [(set RC:$dst, (Int VR128:$src))]>, 8211 T8PD, VEX, Sched<[WriteCvtF2F]>; 8212 let hasSideEffects = 0, mayLoad = 1 in 8213 def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), 8214 "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8PD, VEX, 8215 Sched<[WriteCvtF2FLd]>; 8216} 8217 8218multiclass f16c_ps2ph<RegisterClass RC, X86MemOperand x86memop, Intrinsic Int> { 8219 def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), 8220 (ins RC:$src1, i32u8imm:$src2), 8221 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", 8222 [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, 8223 TAPD, VEX, Sched<[WriteCvtF2F]>; 8224 let hasSideEffects = 0, mayStore = 1, 8225 SchedRW = [WriteCvtF2FLd, WriteRMW] in 8226 def mr : Ii8<0x1D, MRMDestMem, (outs), 8227 (ins x86memop:$dst, RC:$src1, i32u8imm:$src2), 8228 "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8229 TAPD, VEX; 8230} 8231 8232let Predicates = [HasF16C] in { 8233 defm VCVTPH2PS : f16c_ph2ps<VR128, f64mem, int_x86_vcvtph2ps_128>; 8234 defm VCVTPH2PSY : f16c_ph2ps<VR256, f128mem, int_x86_vcvtph2ps_256>, VEX_L; 8235 defm VCVTPS2PH : f16c_ps2ph<VR128, f64mem, int_x86_vcvtps2ph_128>; 8236 defm VCVTPS2PHY : f16c_ps2ph<VR256, f128mem, int_x86_vcvtps2ph_256>, VEX_L; 8237 8238 // Pattern match vcvtph2ps of a scalar i64 load. 8239 def : Pat<(int_x86_vcvtph2ps_128 (vzmovl_v2i64 addr:$src)), 8240 (VCVTPH2PSrm addr:$src)>; 8241 def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)), 8242 (VCVTPH2PSrm addr:$src)>; 8243} 8244 8245// Patterns for matching conversions from float to half-float and vice versa. 8246let Predicates = [HasF16C] in { 8247 def : Pat<(fp_to_f16 FR32:$src), 8248 (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr 8249 (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>; 8250 8251 def : Pat<(f16_to_fp GR16:$src), 8252 (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr 8253 (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >; 8254 8255 def : Pat<(f16_to_fp (i16 (fp_to_f16 FR32:$src))), 8256 (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr 8257 (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >; 8258} 8259 8260//===----------------------------------------------------------------------===// 8261// AVX2 Instructions 8262//===----------------------------------------------------------------------===// 8263 8264/// AVX2_binop_rmi - AVX2 binary operator with 8-bit immediate 8265multiclass AVX2_binop_rmi<bits<8> opc, string OpcodeStr, SDNode OpNode, 8266 ValueType OpVT, RegisterClass RC, PatFrag memop_frag, 8267 X86MemOperand x86memop> { 8268 let isCommutable = 1 in 8269 def rri : AVX2AIi8<opc, MRMSrcReg, (outs RC:$dst), 8270 (ins RC:$src1, RC:$src2, u8imm:$src3), 8271 !strconcat(OpcodeStr, 8272 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 8273 [(set RC:$dst, (OpVT (OpNode RC:$src1, RC:$src2, imm:$src3)))]>, 8274 Sched<[WriteBlend]>, VEX_4V; 8275 def rmi : AVX2AIi8<opc, MRMSrcMem, (outs RC:$dst), 8276 (ins RC:$src1, x86memop:$src2, u8imm:$src3), 8277 !strconcat(OpcodeStr, 8278 "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), 8279 [(set RC:$dst, 8280 (OpVT (OpNode RC:$src1, 8281 (bitconvert (memop_frag addr:$src2)), imm:$src3)))]>, 8282 Sched<[WriteBlendLd, ReadAfterLd]>, VEX_4V; 8283} 8284 8285defm VPBLENDD : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v4i32, 8286 VR128, loadv2i64, i128mem>; 8287defm VPBLENDDY : AVX2_binop_rmi<0x02, "vpblendd", X86Blendi, v8i32, 8288 VR256, loadv4i64, i256mem>, VEX_L; 8289 8290//===----------------------------------------------------------------------===// 8291// VPBROADCAST - Load from memory and broadcast to all elements of the 8292// destination operand 8293// 8294multiclass avx2_broadcast<bits<8> opc, string OpcodeStr, 8295 X86MemOperand x86memop, PatFrag ld_frag, 8296 Intrinsic Int128, Intrinsic Int256> { 8297 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), 8298 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8299 [(set VR128:$dst, (Int128 VR128:$src))]>, 8300 Sched<[WriteShuffle]>, VEX; 8301 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), (ins x86memop:$src), 8302 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8303 [(set VR128:$dst, 8304 (Int128 (scalar_to_vector (ld_frag addr:$src))))]>, 8305 Sched<[WriteLoad]>, VEX; 8306 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), 8307 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8308 [(set VR256:$dst, (Int256 VR128:$src))]>, 8309 Sched<[WriteShuffle256]>, VEX, VEX_L; 8310 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), (ins x86memop:$src), 8311 !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), 8312 [(set VR256:$dst, 8313 (Int256 (scalar_to_vector (ld_frag addr:$src))))]>, 8314 Sched<[WriteLoad]>, VEX, VEX_L; 8315} 8316 8317defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, 8318 int_x86_avx2_pbroadcastb_128, 8319 int_x86_avx2_pbroadcastb_256>; 8320defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, 8321 int_x86_avx2_pbroadcastw_128, 8322 int_x86_avx2_pbroadcastw_256>; 8323defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, 8324 int_x86_avx2_pbroadcastd_128, 8325 int_x86_avx2_pbroadcastd_256>; 8326defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, 8327 int_x86_avx2_pbroadcastq_128, 8328 int_x86_avx2_pbroadcastq_256>; 8329 8330let Predicates = [HasAVX2] in { 8331 def : Pat<(v16i8 (X86VBroadcast (loadi8 addr:$src))), 8332 (VPBROADCASTBrm addr:$src)>; 8333 def : Pat<(v32i8 (X86VBroadcast (loadi8 addr:$src))), 8334 (VPBROADCASTBYrm addr:$src)>; 8335 def : Pat<(v8i16 (X86VBroadcast (loadi16 addr:$src))), 8336 (VPBROADCASTWrm addr:$src)>; 8337 def : Pat<(v16i16 (X86VBroadcast (loadi16 addr:$src))), 8338 (VPBROADCASTWYrm addr:$src)>; 8339 def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8340 (VPBROADCASTDrm addr:$src)>; 8341 def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8342 (VPBROADCASTDYrm addr:$src)>; 8343 def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), 8344 (VPBROADCASTQrm addr:$src)>; 8345 def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8346 (VPBROADCASTQYrm addr:$src)>; 8347 8348 def : Pat<(v16i8 (X86VBroadcast (v16i8 VR128:$src))), 8349 (VPBROADCASTBrr VR128:$src)>; 8350 def : Pat<(v32i8 (X86VBroadcast (v16i8 VR128:$src))), 8351 (VPBROADCASTBYrr VR128:$src)>; 8352 def : Pat<(v8i16 (X86VBroadcast (v8i16 VR128:$src))), 8353 (VPBROADCASTWrr VR128:$src)>; 8354 def : Pat<(v16i16 (X86VBroadcast (v8i16 VR128:$src))), 8355 (VPBROADCASTWYrr VR128:$src)>; 8356 def : Pat<(v4i32 (X86VBroadcast (v4i32 VR128:$src))), 8357 (VPBROADCASTDrr VR128:$src)>; 8358 def : Pat<(v8i32 (X86VBroadcast (v4i32 VR128:$src))), 8359 (VPBROADCASTDYrr VR128:$src)>; 8360 def : Pat<(v2i64 (X86VBroadcast (v2i64 VR128:$src))), 8361 (VPBROADCASTQrr VR128:$src)>; 8362 def : Pat<(v4i64 (X86VBroadcast (v2i64 VR128:$src))), 8363 (VPBROADCASTQYrr VR128:$src)>; 8364 def : Pat<(v4f32 (X86VBroadcast (v4f32 VR128:$src))), 8365 (VBROADCASTSSrr VR128:$src)>; 8366 def : Pat<(v8f32 (X86VBroadcast (v4f32 VR128:$src))), 8367 (VBROADCASTSSYrr VR128:$src)>; 8368 def : Pat<(v2f64 (X86VBroadcast (v2f64 VR128:$src))), 8369 (VPBROADCASTQrr VR128:$src)>; 8370 def : Pat<(v4f64 (X86VBroadcast (v2f64 VR128:$src))), 8371 (VBROADCASTSDYrr VR128:$src)>; 8372 8373 // Provide aliases for broadcast from the same register class that 8374 // automatically does the extract. 8375 def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))), 8376 (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), 8377 sub_xmm)))>; 8378 def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))), 8379 (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), 8380 sub_xmm)))>; 8381 def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))), 8382 (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), 8383 sub_xmm)))>; 8384 def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))), 8385 (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), 8386 sub_xmm)))>; 8387 def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), 8388 (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), 8389 sub_xmm)))>; 8390 def : Pat<(v4f64 (X86VBroadcast (v4f64 VR256:$src))), 8391 (VBROADCASTSDYrr (v2f64 (EXTRACT_SUBREG (v4f64 VR256:$src), 8392 sub_xmm)))>; 8393 8394 // Provide fallback in case the load node that is used in the patterns above 8395 // is used by additional users, which prevents the pattern selection. 8396 let AddedComplexity = 20 in { 8397 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8398 (VBROADCASTSSrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8399 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8400 (VBROADCASTSSYrr (COPY_TO_REGCLASS FR32:$src, VR128))>; 8401 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8402 (VBROADCASTSDYrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 8403 8404 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8405 (VBROADCASTSSrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8406 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8407 (VBROADCASTSSYrr (COPY_TO_REGCLASS GR32:$src, VR128))>; 8408 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8409 (VBROADCASTSDYrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8410 8411 def : Pat<(v16i8 (X86VBroadcast GR8:$src)), 8412 (VPBROADCASTBrr (COPY_TO_REGCLASS 8413 (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), 8414 VR128))>; 8415 def : Pat<(v32i8 (X86VBroadcast GR8:$src)), 8416 (VPBROADCASTBYrr (COPY_TO_REGCLASS 8417 (i32 (SUBREG_TO_REG (i32 0), GR8:$src, sub_8bit)), 8418 VR128))>; 8419 8420 def : Pat<(v8i16 (X86VBroadcast GR16:$src)), 8421 (VPBROADCASTWrr (COPY_TO_REGCLASS 8422 (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), 8423 VR128))>; 8424 def : Pat<(v16i16 (X86VBroadcast GR16:$src)), 8425 (VPBROADCASTWYrr (COPY_TO_REGCLASS 8426 (i32 (SUBREG_TO_REG (i32 0), GR16:$src, sub_16bit)), 8427 VR128))>; 8428 8429 // The patterns for VPBROADCASTD are not needed because they would match 8430 // the exact same thing as VBROADCASTSS patterns. 8431 8432 def : Pat<(v2i64 (X86VBroadcast GR64:$src)), 8433 (VPBROADCASTQrr (COPY_TO_REGCLASS GR64:$src, VR128))>; 8434 // The v4i64 pattern is not needed because VBROADCASTSDYrr already match. 8435 } 8436} 8437 8438// AVX1 broadcast patterns 8439let Predicates = [HasAVX1Only] in { 8440def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), 8441 (VBROADCASTSSYrm addr:$src)>; 8442def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), 8443 (VBROADCASTSDYrm addr:$src)>; 8444def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), 8445 (VBROADCASTSSrm addr:$src)>; 8446} 8447 8448let Predicates = [HasAVX] in { 8449 // Provide fallback in case the load node that is used in the patterns above 8450 // is used by additional users, which prevents the pattern selection. 8451 let AddedComplexity = 20 in { 8452 // 128bit broadcasts: 8453 def : Pat<(v4f32 (X86VBroadcast FR32:$src)), 8454 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0)>; 8455 def : Pat<(v8f32 (X86VBroadcast FR32:$src)), 8456 (VINSERTF128rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), 8457 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), sub_xmm), 8458 (VPSHUFDri (COPY_TO_REGCLASS FR32:$src, VR128), 0), 1)>; 8459 def : Pat<(v4f64 (X86VBroadcast FR64:$src)), 8460 (VINSERTF128rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), 8461 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), sub_xmm), 8462 (VPSHUFDri (COPY_TO_REGCLASS FR64:$src, VR128), 0x44), 1)>; 8463 8464 def : Pat<(v4i32 (X86VBroadcast GR32:$src)), 8465 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0)>; 8466 def : Pat<(v8i32 (X86VBroadcast GR32:$src)), 8467 (VINSERTF128rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), 8468 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), sub_xmm), 8469 (VPSHUFDri (COPY_TO_REGCLASS GR32:$src, VR128), 0), 1)>; 8470 def : Pat<(v4i64 (X86VBroadcast GR64:$src)), 8471 (VINSERTF128rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), 8472 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), sub_xmm), 8473 (VPSHUFDri (COPY_TO_REGCLASS GR64:$src, VR128), 0x44), 1)>; 8474 } 8475 8476 def : Pat<(v2f64 (X86VBroadcast f64:$src)), 8477 (VMOVDDUPrr (COPY_TO_REGCLASS FR64:$src, VR128))>; 8478} 8479 8480//===----------------------------------------------------------------------===// 8481// VPERM - Permute instructions 8482// 8483 8484multiclass avx2_perm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8485 ValueType OpVT, X86FoldableSchedWrite Sched> { 8486 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8487 (ins VR256:$src1, VR256:$src2), 8488 !strconcat(OpcodeStr, 8489 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8490 [(set VR256:$dst, 8491 (OpVT (X86VPermv VR256:$src1, VR256:$src2)))]>, 8492 Sched<[Sched]>, VEX_4V, VEX_L; 8493 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8494 (ins VR256:$src1, i256mem:$src2), 8495 !strconcat(OpcodeStr, 8496 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8497 [(set VR256:$dst, 8498 (OpVT (X86VPermv VR256:$src1, 8499 (bitconvert (mem_frag addr:$src2)))))]>, 8500 Sched<[Sched.Folded, ReadAfterLd]>, VEX_4V, VEX_L; 8501} 8502 8503defm VPERMD : avx2_perm<0x36, "vpermd", loadv4i64, v8i32, WriteShuffle256>; 8504let ExeDomain = SSEPackedSingle in 8505defm VPERMPS : avx2_perm<0x16, "vpermps", loadv8f32, v8f32, WriteFShuffle256>; 8506 8507multiclass avx2_perm_imm<bits<8> opc, string OpcodeStr, PatFrag mem_frag, 8508 ValueType OpVT, X86FoldableSchedWrite Sched> { 8509 def Yri : AVX2AIi8<opc, MRMSrcReg, (outs VR256:$dst), 8510 (ins VR256:$src1, u8imm:$src2), 8511 !strconcat(OpcodeStr, 8512 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8513 [(set VR256:$dst, 8514 (OpVT (X86VPermi VR256:$src1, (i8 imm:$src2))))]>, 8515 Sched<[Sched]>, VEX, VEX_L; 8516 def Ymi : AVX2AIi8<opc, MRMSrcMem, (outs VR256:$dst), 8517 (ins i256mem:$src1, u8imm:$src2), 8518 !strconcat(OpcodeStr, 8519 "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8520 [(set VR256:$dst, 8521 (OpVT (X86VPermi (mem_frag addr:$src1), 8522 (i8 imm:$src2))))]>, 8523 Sched<[Sched.Folded, ReadAfterLd]>, VEX, VEX_L; 8524} 8525 8526defm VPERMQ : avx2_perm_imm<0x00, "vpermq", loadv4i64, v4i64, 8527 WriteShuffle256>, VEX_W; 8528let ExeDomain = SSEPackedDouble in 8529defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", loadv4f64, v4f64, 8530 WriteFShuffle256>, VEX_W; 8531 8532//===----------------------------------------------------------------------===// 8533// VPERM2I128 - Permute Floating-Point Values in 128-bit chunks 8534// 8535def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), 8536 (ins VR256:$src1, VR256:$src2, u8imm:$src3), 8537 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8538 [(set VR256:$dst, (v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, 8539 (i8 imm:$src3))))]>, Sched<[WriteShuffle256]>, 8540 VEX_4V, VEX_L; 8541def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), 8542 (ins VR256:$src1, f256mem:$src2, u8imm:$src3), 8543 "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8544 [(set VR256:$dst, (X86VPerm2x128 VR256:$src1, (loadv4i64 addr:$src2), 8545 (i8 imm:$src3)))]>, 8546 Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8547 8548let Predicates = [HasAVX2] in { 8549def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8550 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8551def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8552 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8553def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))), 8554 (VPERM2I128rr VR256:$src1, VR256:$src2, imm:$imm)>; 8555 8556def : Pat<(v32i8 (X86VPerm2x128 VR256:$src1, (bc_v32i8 (loadv4i64 addr:$src2)), 8557 (i8 imm:$imm))), 8558 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8559def : Pat<(v16i16 (X86VPerm2x128 VR256:$src1, 8560 (bc_v16i16 (loadv4i64 addr:$src2)), (i8 imm:$imm))), 8561 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8562def : Pat<(v8i32 (X86VPerm2x128 VR256:$src1, (bc_v8i32 (loadv4i64 addr:$src2)), 8563 (i8 imm:$imm))), 8564 (VPERM2I128rm VR256:$src1, addr:$src2, imm:$imm)>; 8565} 8566 8567 8568//===----------------------------------------------------------------------===// 8569// VINSERTI128 - Insert packed integer values 8570// 8571let hasSideEffects = 0 in { 8572def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), 8573 (ins VR256:$src1, VR128:$src2, u8imm:$src3), 8574 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8575 []>, Sched<[WriteShuffle256]>, VEX_4V, VEX_L; 8576let mayLoad = 1 in 8577def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), 8578 (ins VR256:$src1, i128mem:$src2, u8imm:$src3), 8579 "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", 8580 []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; 8581} 8582 8583let Predicates = [HasAVX2] in { 8584def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), 8585 (iPTR imm)), 8586 (VINSERTI128rr VR256:$src1, VR128:$src2, 8587 (INSERT_get_vinsert128_imm VR256:$ins))>; 8588def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), (v4i32 VR128:$src2), 8589 (iPTR imm)), 8590 (VINSERTI128rr VR256:$src1, VR128:$src2, 8591 (INSERT_get_vinsert128_imm VR256:$ins))>; 8592def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), (v16i8 VR128:$src2), 8593 (iPTR imm)), 8594 (VINSERTI128rr VR256:$src1, VR128:$src2, 8595 (INSERT_get_vinsert128_imm VR256:$ins))>; 8596def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), 8597 (iPTR imm)), 8598 (VINSERTI128rr VR256:$src1, VR128:$src2, 8599 (INSERT_get_vinsert128_imm VR256:$ins))>; 8600 8601def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (loadv2i64 addr:$src2), 8602 (iPTR imm)), 8603 (VINSERTI128rm VR256:$src1, addr:$src2, 8604 (INSERT_get_vinsert128_imm VR256:$ins))>; 8605def : Pat<(vinsert128_insert:$ins (v8i32 VR256:$src1), 8606 (bc_v4i32 (loadv2i64 addr:$src2)), 8607 (iPTR imm)), 8608 (VINSERTI128rm VR256:$src1, addr:$src2, 8609 (INSERT_get_vinsert128_imm VR256:$ins))>; 8610def : Pat<(vinsert128_insert:$ins (v32i8 VR256:$src1), 8611 (bc_v16i8 (loadv2i64 addr:$src2)), 8612 (iPTR imm)), 8613 (VINSERTI128rm VR256:$src1, addr:$src2, 8614 (INSERT_get_vinsert128_imm VR256:$ins))>; 8615def : Pat<(vinsert128_insert:$ins (v16i16 VR256:$src1), 8616 (bc_v8i16 (loadv2i64 addr:$src2)), 8617 (iPTR imm)), 8618 (VINSERTI128rm VR256:$src1, addr:$src2, 8619 (INSERT_get_vinsert128_imm VR256:$ins))>; 8620} 8621 8622//===----------------------------------------------------------------------===// 8623// VEXTRACTI128 - Extract packed integer values 8624// 8625def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), 8626 (ins VR256:$src1, u8imm:$src2), 8627 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8628 Sched<[WriteShuffle256]>, VEX, VEX_L; 8629let hasSideEffects = 0, mayStore = 1 in 8630def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), 8631 (ins i128mem:$dst, VR256:$src1, u8imm:$src2), 8632 "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, 8633 Sched<[WriteStore]>, VEX, VEX_L; 8634 8635let Predicates = [HasAVX2] in { 8636def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8637 (v2i64 (VEXTRACTI128rr 8638 (v4i64 VR256:$src1), 8639 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8640def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8641 (v4i32 (VEXTRACTI128rr 8642 (v8i32 VR256:$src1), 8643 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8644def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8645 (v8i16 (VEXTRACTI128rr 8646 (v16i16 VR256:$src1), 8647 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8648def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)), 8649 (v16i8 (VEXTRACTI128rr 8650 (v32i8 VR256:$src1), 8651 (EXTRACT_get_vextract128_imm VR128:$ext)))>; 8652 8653def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1), 8654 (iPTR imm))), addr:$dst), 8655 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8656 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8657def : Pat<(store (v4i32 (vextract128_extract:$ext (v8i32 VR256:$src1), 8658 (iPTR imm))), addr:$dst), 8659 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8660 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8661def : Pat<(store (v8i16 (vextract128_extract:$ext (v16i16 VR256:$src1), 8662 (iPTR imm))), addr:$dst), 8663 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8664 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8665def : Pat<(store (v16i8 (vextract128_extract:$ext (v32i8 VR256:$src1), 8666 (iPTR imm))), addr:$dst), 8667 (VEXTRACTI128mr addr:$dst, VR256:$src1, 8668 (EXTRACT_get_vextract128_imm VR128:$ext))>; 8669} 8670 8671//===----------------------------------------------------------------------===// 8672// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores 8673// 8674multiclass avx2_pmovmask<string OpcodeStr, 8675 Intrinsic IntLd128, Intrinsic IntLd256, 8676 Intrinsic IntSt128, Intrinsic IntSt256> { 8677 def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), 8678 (ins VR128:$src1, i128mem:$src2), 8679 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8680 [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; 8681 def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), 8682 (ins VR256:$src1, i256mem:$src2), 8683 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8684 [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, 8685 VEX_4V, VEX_L; 8686 def mr : AVX28I<0x8e, MRMDestMem, (outs), 8687 (ins i128mem:$dst, VR128:$src1, VR128:$src2), 8688 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8689 [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; 8690 def Ymr : AVX28I<0x8e, MRMDestMem, (outs), 8691 (ins i256mem:$dst, VR256:$src1, VR256:$src2), 8692 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8693 [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V, VEX_L; 8694} 8695 8696defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", 8697 int_x86_avx2_maskload_d, 8698 int_x86_avx2_maskload_d_256, 8699 int_x86_avx2_maskstore_d, 8700 int_x86_avx2_maskstore_d_256>; 8701defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", 8702 int_x86_avx2_maskload_q, 8703 int_x86_avx2_maskload_q_256, 8704 int_x86_avx2_maskstore_q, 8705 int_x86_avx2_maskstore_q_256>, VEX_W; 8706 8707def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src)), 8708 (VMASKMOVPSYmr addr:$ptr, VR256:$mask, VR256:$src)>; 8709 8710def: Pat<(masked_store addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src)), 8711 (VPMASKMOVDYmr addr:$ptr, VR256:$mask, VR256:$src)>; 8712 8713def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src)), 8714 (VMASKMOVPSmr addr:$ptr, VR128:$mask, VR128:$src)>; 8715 8716def: Pat<(masked_store addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src)), 8717 (VPMASKMOVDmr addr:$ptr, VR128:$mask, VR128:$src)>; 8718 8719def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), 8720 (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>; 8721 8722def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), 8723 (bc_v8f32 (v8i32 immAllZerosV)))), 8724 (VMASKMOVPSYrm VR256:$mask, addr:$ptr)>; 8725 8726def: Pat<(v8f32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8f32 VR256:$src0))), 8727 (VBLENDVPSYrr VR256:$src0, (VMASKMOVPSYrm VR256:$mask, addr:$ptr), 8728 VR256:$mask)>; 8729 8730def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), undef)), 8731 (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; 8732 8733def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 immAllZerosV))), 8734 (VPMASKMOVDYrm VR256:$mask, addr:$ptr)>; 8735 8736def: Pat<(v8i32 (masked_load addr:$ptr, (v8i32 VR256:$mask), (v8i32 VR256:$src0))), 8737 (VBLENDVPSYrr VR256:$src0, (VPMASKMOVDYrm VR256:$mask, addr:$ptr), 8738 VR256:$mask)>; 8739 8740def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)), 8741 (VMASKMOVPSrm VR128:$mask, addr:$ptr)>; 8742 8743def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), 8744 (bc_v4f32 (v4i32 immAllZerosV)))), 8745 (VMASKMOVPSrm VR128:$mask, addr:$ptr)>; 8746 8747def: Pat<(v4f32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4f32 VR128:$src0))), 8748 (VBLENDVPSrr VR128:$src0, (VMASKMOVPSrm VR128:$mask, addr:$ptr), 8749 VR128:$mask)>; 8750 8751def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), undef)), 8752 (VPMASKMOVDrm VR128:$mask, addr:$ptr)>; 8753 8754def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 immAllZerosV))), 8755 (VPMASKMOVDrm VR128:$mask, addr:$ptr)>; 8756 8757def: Pat<(v4i32 (masked_load addr:$ptr, (v4i32 VR128:$mask), (v4i32 VR128:$src0))), 8758 (VBLENDVPSrr VR128:$src0, (VPMASKMOVDrm VR128:$mask, addr:$ptr), 8759 VR128:$mask)>; 8760 8761def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src)), 8762 (VMASKMOVPDYmr addr:$ptr, VR256:$mask, VR256:$src)>; 8763 8764def: Pat<(masked_store addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src)), 8765 (VPMASKMOVQYmr addr:$ptr, VR256:$mask, VR256:$src)>; 8766 8767def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), 8768 (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>; 8769 8770def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), 8771 (v4f64 immAllZerosV))), 8772 (VMASKMOVPDYrm VR256:$mask, addr:$ptr)>; 8773 8774def: Pat<(v4f64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4f64 VR256:$src0))), 8775 (VBLENDVPDYrr VR256:$src0, (VMASKMOVPDYrm VR256:$mask, addr:$ptr), 8776 VR256:$mask)>; 8777 8778def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), undef)), 8779 (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; 8780 8781def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), 8782 (bc_v4i64 (v8i32 immAllZerosV)))), 8783 (VPMASKMOVQYrm VR256:$mask, addr:$ptr)>; 8784 8785def: Pat<(v4i64 (masked_load addr:$ptr, (v4i64 VR256:$mask), (v4i64 VR256:$src0))), 8786 (VBLENDVPDYrr VR256:$src0, (VPMASKMOVQYrm VR256:$mask, addr:$ptr), 8787 VR256:$mask)>; 8788 8789def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src)), 8790 (VMASKMOVPDmr addr:$ptr, VR128:$mask, VR128:$src)>; 8791 8792def: Pat<(masked_store addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src)), 8793 (VPMASKMOVQmr addr:$ptr, VR128:$mask, VR128:$src)>; 8794 8795def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), 8796 (VMASKMOVPDrm VR128:$mask, addr:$ptr)>; 8797 8798def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), 8799 (v2f64 immAllZerosV))), 8800 (VMASKMOVPDrm VR128:$mask, addr:$ptr)>; 8801 8802def: Pat<(v2f64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2f64 VR128:$src0))), 8803 (VBLENDVPDrr VR128:$src0, (VMASKMOVPDrm VR128:$mask, addr:$ptr), 8804 VR128:$mask)>; 8805 8806def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), undef)), 8807 (VPMASKMOVQrm VR128:$mask, addr:$ptr)>; 8808 8809def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), 8810 (bc_v2i64 (v4i32 immAllZerosV)))), 8811 (VPMASKMOVQrm VR128:$mask, addr:$ptr)>; 8812 8813def: Pat<(v2i64 (masked_load addr:$ptr, (v2i64 VR128:$mask), (v2i64 VR128:$src0))), 8814 (VBLENDVPDrr VR128:$src0, (VPMASKMOVQrm VR128:$mask, addr:$ptr), 8815 VR128:$mask)>; 8816 8817//===----------------------------------------------------------------------===// 8818// Variable Bit Shifts 8819// 8820multiclass avx2_var_shift<bits<8> opc, string OpcodeStr, SDNode OpNode, 8821 ValueType vt128, ValueType vt256> { 8822 def rr : AVX28I<opc, MRMSrcReg, (outs VR128:$dst), 8823 (ins VR128:$src1, VR128:$src2), 8824 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8825 [(set VR128:$dst, 8826 (vt128 (OpNode VR128:$src1, (vt128 VR128:$src2))))]>, 8827 VEX_4V, Sched<[WriteVarVecShift]>; 8828 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst), 8829 (ins VR128:$src1, i128mem:$src2), 8830 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8831 [(set VR128:$dst, 8832 (vt128 (OpNode VR128:$src1, 8833 (vt128 (bitconvert (loadv2i64 addr:$src2))))))]>, 8834 VEX_4V, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; 8835 def Yrr : AVX28I<opc, MRMSrcReg, (outs VR256:$dst), 8836 (ins VR256:$src1, VR256:$src2), 8837 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8838 [(set VR256:$dst, 8839 (vt256 (OpNode VR256:$src1, (vt256 VR256:$src2))))]>, 8840 VEX_4V, VEX_L, Sched<[WriteVarVecShift]>; 8841 def Yrm : AVX28I<opc, MRMSrcMem, (outs VR256:$dst), 8842 (ins VR256:$src1, i256mem:$src2), 8843 !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), 8844 [(set VR256:$dst, 8845 (vt256 (OpNode VR256:$src1, 8846 (vt256 (bitconvert (loadv4i64 addr:$src2))))))]>, 8847 VEX_4V, VEX_L, Sched<[WriteVarVecShiftLd, ReadAfterLd]>; 8848} 8849 8850defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", shl, v4i32, v8i32>; 8851defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", shl, v2i64, v4i64>, VEX_W; 8852defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", srl, v4i32, v8i32>; 8853defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; 8854defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; 8855 8856//===----------------------------------------------------------------------===// 8857// VGATHER - GATHER Operations 8858multiclass avx2_gather<bits<8> opc, string OpcodeStr, RegisterClass RC256, 8859 X86MemOperand memop128, X86MemOperand memop256> { 8860 def rm : AVX28I<opc, MRMSrcMem, (outs VR128:$dst, VR128:$mask_wb), 8861 (ins VR128:$src1, memop128:$src2, VR128:$mask), 8862 !strconcat(OpcodeStr, 8863 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8864 []>, VEX_4VOp3; 8865 def Yrm : AVX28I<opc, MRMSrcMem, (outs RC256:$dst, RC256:$mask_wb), 8866 (ins RC256:$src1, memop256:$src2, RC256:$mask), 8867 !strconcat(OpcodeStr, 8868 "\t{$mask, $src2, $dst|$dst, $src2, $mask}"), 8869 []>, VEX_4VOp3, VEX_L; 8870} 8871 8872let mayLoad = 1, Constraints 8873 = "@earlyclobber $dst,@earlyclobber $mask_wb, $src1 = $dst, $mask = $mask_wb" 8874 in { 8875 defm VPGATHERDQ : avx2_gather<0x90, "vpgatherdq", VR256, vx64mem, vx64mem>, VEX_W; 8876 defm VPGATHERQQ : avx2_gather<0x91, "vpgatherqq", VR256, vx64mem, vy64mem>, VEX_W; 8877 defm VPGATHERDD : avx2_gather<0x90, "vpgatherdd", VR256, vx32mem, vy32mem>; 8878 defm VPGATHERQD : avx2_gather<0x91, "vpgatherqd", VR128, vx32mem, vy32mem>; 8879 8880 let ExeDomain = SSEPackedDouble in { 8881 defm VGATHERDPD : avx2_gather<0x92, "vgatherdpd", VR256, vx64mem, vx64mem>, VEX_W; 8882 defm VGATHERQPD : avx2_gather<0x93, "vgatherqpd", VR256, vx64mem, vy64mem>, VEX_W; 8883 } 8884 8885 let ExeDomain = SSEPackedSingle in { 8886 defm VGATHERDPS : avx2_gather<0x92, "vgatherdps", VR256, vx32mem, vy32mem>; 8887 defm VGATHERQPS : avx2_gather<0x93, "vgatherqps", VR128, vx32mem, vy32mem>; 8888 } 8889} 8890