1//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file describes the Hexagon Vector instructions in TableGen format. 11// 12//===----------------------------------------------------------------------===// 13 14def V2I1: PatLeaf<(v2i1 PredRegs:$R)>; 15def V4I1: PatLeaf<(v4i1 PredRegs:$R)>; 16def V8I1: PatLeaf<(v8i1 PredRegs:$R)>; 17def V4I8: PatLeaf<(v4i8 IntRegs:$R)>; 18def V2I16: PatLeaf<(v2i16 IntRegs:$R)>; 19def V8I8: PatLeaf<(v8i8 DoubleRegs:$R)>; 20def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>; 21def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>; 22 23 24multiclass bitconvert_32<ValueType a, ValueType b> { 25 def : Pat <(b (bitconvert (a IntRegs:$src))), 26 (b IntRegs:$src)>; 27 def : Pat <(a (bitconvert (b IntRegs:$src))), 28 (a IntRegs:$src)>; 29} 30 31multiclass bitconvert_64<ValueType a, ValueType b> { 32 def : Pat <(b (bitconvert (a DoubleRegs:$src))), 33 (b DoubleRegs:$src)>; 34 def : Pat <(a (bitconvert (b DoubleRegs:$src))), 35 (a DoubleRegs:$src)>; 36} 37 38// Bit convert vector types. 39defm : bitconvert_32<v4i8, i32>; 40defm : bitconvert_32<v2i16, i32>; 41defm : bitconvert_32<v2i16, v4i8>; 42 43defm : bitconvert_64<v8i8, i64>; 44defm : bitconvert_64<v4i16, i64>; 45defm : bitconvert_64<v2i32, i64>; 46defm : bitconvert_64<v8i8, v4i16>; 47defm : bitconvert_64<v8i8, v2i32>; 48defm : bitconvert_64<v4i16, v2i32>; 49 50 51// Vector shift support. Vector shifting in Hexagon is rather different 52// from internal representation of LLVM. 53// LLVM assumes all shifts (in vector case) will have the form 54// <VT> = SHL/SRA/SRL <VT> by <VT> 55// while Hexagon has the following format: 56// <VT> = SHL/SRA/SRL <VT> by <IT/i32> 57// As a result, special care is needed to guarantee correctness and 58// performance. 59class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> 60 : S_2OpInstImm<Str, MajOp, MinOp, u4Imm, 61 [(set (v4i16 DoubleRegs:$dst), 62 (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> { 63 bits<4> src2; 64 let Inst{11-8} = src2; 65} 66 67class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp> 68 : S_2OpInstImm<Str, MajOp, MinOp, u5Imm, 69 [(set (v2i32 DoubleRegs:$dst), 70 (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> { 71 bits<5> src2; 72 let Inst{12-8} = src2; 73} 74 75def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), 76 (A2_svaddh IntRegs:$src1, IntRegs:$src2)>; 77 78def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))), 79 (A2_svsubh IntRegs:$src1, IntRegs:$src2)>; 80 81def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>; 82def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>; 83def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>; 84 85def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>; 86def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>; 87def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>; 88 89 90def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>; 91def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>; 92 93// Replicate the low 8-bits from 32-bits input register into each of the 94// four bytes of 32-bits destination register. 95def: Pat<(v4i8 (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>; 96 97// Replicate the low 16-bits from 32-bits input register into each of the 98// four halfwords of 64-bits destination register. 99def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>; 100 101 102class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type> 103 : Pat <(Op Type:$Rss, Type:$Rtt), 104 (MI Type:$Rss, Type:$Rtt)>; 105 106def: VArith_pat <A2_vaddub, add, V8I8>; 107def: VArith_pat <A2_vaddh, add, V4I16>; 108def: VArith_pat <A2_vaddw, add, V2I32>; 109def: VArith_pat <A2_vsubub, sub, V8I8>; 110def: VArith_pat <A2_vsubh, sub, V4I16>; 111def: VArith_pat <A2_vsubw, sub, V2I32>; 112 113def: VArith_pat <A2_and, and, V2I16>; 114def: VArith_pat <A2_xor, xor, V2I16>; 115def: VArith_pat <A2_or, or, V2I16>; 116 117def: VArith_pat <A2_andp, and, V8I8>; 118def: VArith_pat <A2_andp, and, V4I16>; 119def: VArith_pat <A2_andp, and, V2I32>; 120def: VArith_pat <A2_orp, or, V8I8>; 121def: VArith_pat <A2_orp, or, V4I16>; 122def: VArith_pat <A2_orp, or, V2I32>; 123def: VArith_pat <A2_xorp, xor, V8I8>; 124def: VArith_pat <A2_xorp, xor, V4I16>; 125def: VArith_pat <A2_xorp, xor, V2I32>; 126 127def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), 128 (i32 u5ImmPred:$c))))), 129 (S2_asr_i_vw V2I32:$b, imm:$c)>; 130def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), 131 (i32 u5ImmPred:$c))))), 132 (S2_lsr_i_vw V2I32:$b, imm:$c)>; 133def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c), 134 (i32 u5ImmPred:$c))))), 135 (S2_asl_i_vw V2I32:$b, imm:$c)>; 136 137def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), 138 (S2_asr_i_vh V4I16:$b, imm:$c)>; 139def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), 140 (S2_lsr_i_vh V4I16:$b, imm:$c)>; 141def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))), 142 (S2_asl_i_vh V4I16:$b, imm:$c)>; 143 144 145def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2, 146 [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>; 147def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2, 148 [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>; 149 150def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>; 151def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>; 152def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>; 153def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>; 154def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>; 155def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>; 156 157def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)), 158 (S2_asr_i_vw V2I32:$Rs, imm:$u5)>; 159def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)), 160 (S2_asr_i_vh V4I16:$Rs, imm:$u4)>; 161def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)), 162 (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>; 163def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)), 164 (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>; 165def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)), 166 (S2_asl_i_vw V2I32:$Rs, imm:$u5)>; 167def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)), 168 (S2_asl_i_vh V4I16:$Rs, imm:$u4)>; 169 170// Vector shift words by register 171def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>; 172def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>; 173def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>; 174def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>; 175 176// Vector shift halfwords by register 177def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>; 178def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>; 179def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>; 180def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>; 181 182class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value> 183 : Pat <(Op Value:$Rs, I32:$Rt), 184 (MI Value:$Rs, I32:$Rt)>; 185 186def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>; 187def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>; 188def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>; 189def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>; 190def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>; 191def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>; 192 193 194def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2, 195 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>; 196def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2, 197 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>; 198def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2, 199 [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>; 200 201def HexagonVCMPBEQ: SDNode<"HexagonISD::VCMPBEQ", SDTHexagonVecCompare_v8i8>; 202def HexagonVCMPBGT: SDNode<"HexagonISD::VCMPBGT", SDTHexagonVecCompare_v8i8>; 203def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>; 204def HexagonVCMPHEQ: SDNode<"HexagonISD::VCMPHEQ", SDTHexagonVecCompare_v4i16>; 205def HexagonVCMPHGT: SDNode<"HexagonISD::VCMPHGT", SDTHexagonVecCompare_v4i16>; 206def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>; 207def HexagonVCMPWEQ: SDNode<"HexagonISD::VCMPWEQ", SDTHexagonVecCompare_v2i32>; 208def HexagonVCMPWGT: SDNode<"HexagonISD::VCMPWGT", SDTHexagonVecCompare_v2i32>; 209def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>; 210 211 212class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value> 213 : Pat <(i1 (Op Value:$Rs, Value:$Rt)), 214 (MI Value:$Rs, Value:$Rt)>; 215 216def: vcmp_i1_pat<A2_vcmpbeq, HexagonVCMPBEQ, V8I8>; 217def: vcmp_i1_pat<A4_vcmpbgt, HexagonVCMPBGT, V8I8>; 218def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>; 219 220def: vcmp_i1_pat<A2_vcmpheq, HexagonVCMPHEQ, V4I16>; 221def: vcmp_i1_pat<A2_vcmphgt, HexagonVCMPHGT, V4I16>; 222def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>; 223 224def: vcmp_i1_pat<A2_vcmpweq, HexagonVCMPWEQ, V2I32>; 225def: vcmp_i1_pat<A2_vcmpwgt, HexagonVCMPWGT, V2I32>; 226def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>; 227 228 229class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy> 230 : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)), 231 (MI InVal:$Rs, InVal:$Rt)>; 232 233def: vcmp_vi1_pat<A2_vcmpweq, seteq, V2I32, v2i1>; 234def: vcmp_vi1_pat<A2_vcmpwgt, setgt, V2I32, v2i1>; 235def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>; 236 237def: vcmp_vi1_pat<A2_vcmpheq, seteq, V4I16, v4i1>; 238def: vcmp_vi1_pat<A2_vcmphgt, setgt, V4I16, v4i1>; 239def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>; 240 241 242// Hexagon doesn't have a vector multiply with C semantics. 243// Instead, generate a pseudo instruction that gets expaneded into two 244// scalar MPYI instructions. 245// This is expanded by ExpandPostRAPseudos. 246let isPseudo = 1 in 247def VMULW : PseudoM<(outs DoubleRegs:$Rd), 248 (ins DoubleRegs:$Rs, DoubleRegs:$Rt), 249 ".error \"Should never try to emit VMULW\"", 250 [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>; 251 252let isPseudo = 1 in 253def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd), 254 (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt), 255 ".error \"Should never try to emit VMULW_ACC\"", 256 [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))], 257 "$Rd = $Rx">; 258 259// Adds two v4i8: Hexagon does not have an insn for this one, so we 260// use the double add v8i8, and use only the low part of the result. 261def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), 262 (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>; 263 264// Subtract two v4i8: Hexagon does not have an insn for this one, so we 265// use the double sub v8i8, and use only the low part of the result. 266def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))), 267 (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>; 268 269// 270// No 32 bit vector mux. 271// 272def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)), 273 (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; 274def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)), 275 (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>; 276 277// 278// 64-bit vector mux. 279// 280def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)), 281 (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>; 282def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)), 283 (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>; 284def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)), 285 (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>; 286 287// 288// No 32 bit vector compare. 289// 290def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)), 291 (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>; 292def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)), 293 (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>; 294def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)), 295 (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>; 296 297def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)), 298 (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>; 299def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)), 300 (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>; 301def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)), 302 (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>; 303 304 305class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value, 306 ValueType CmpTy> 307 : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)), 308 (InvMI Value:$Rt, Value:$Rs)>; 309 310// Map from a compare operation to the corresponding instruction with the 311// order of operands reversed, e.g. x > y --> cmp.lt(y,x). 312def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, i1>; 313def: InvertCmp_pat<A4_vcmpbgt, setlt, V8I8, v8i1>; 314def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, i1>; 315def: InvertCmp_pat<A2_vcmphgt, setlt, V4I16, v4i1>; 316def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, i1>; 317def: InvertCmp_pat<A2_vcmpwgt, setlt, V2I32, v2i1>; 318 319def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, i1>; 320def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8, v8i1>; 321def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>; 322def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>; 323def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>; 324def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>; 325 326// Map from vcmpne(Rss) -> !vcmpew(Rss). 327// rs != rt -> !(rs == rt). 328def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)), 329 (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>; 330 331 332// Truncate: from vector B copy all 'E'ven 'B'yte elements: 333// A[0] = B[0]; A[1] = B[2]; A[2] = B[4]; A[3] = B[6]; 334def: Pat<(v4i8 (trunc V4I16:$Rs)), 335 (S2_vtrunehb V4I16:$Rs)>; 336 337// Truncate: from vector B copy all 'O'dd 'B'yte elements: 338// A[0] = B[1]; A[1] = B[3]; A[2] = B[5]; A[3] = B[7]; 339// S2_vtrunohb 340 341// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements: 342// A[0] = B[0]; A[1] = B[2]; A[2] = C[0]; A[3] = C[2]; 343// S2_vtruneh 344 345def: Pat<(v2i16 (trunc V2I32:$Rs)), 346 (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>; 347 348 349def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>; 350def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>; 351 352def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>; 353def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>; 354 355def: Pat<(v4i16 (zext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; 356def: Pat<(v2i32 (zext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; 357def: Pat<(v4i16 (anyext V4I8:$Rs)), (S2_vzxtbh V4I8:$Rs)>; 358def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>; 359def: Pat<(v4i16 (sext V4I8:$Rs)), (S2_vsxtbh V4I8:$Rs)>; 360def: Pat<(v2i32 (sext V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>; 361 362// Sign extends a v2i8 into a v2i32. 363def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)), 364 (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>; 365 366// Sign extends a v2i16 into a v2i32. 367def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)), 368 (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>; 369 370 371// Multiplies two v2i16 and returns a v2i32. We are using here the 372// saturating multiply, as hexagon does not provide a non saturating 373// vector multiply, and saturation does not impact the result that is 374// in double precision of the operands. 375 376// Multiplies two v2i16 vectors: as Hexagon does not have a multiply 377// with the C semantics for this one, this pattern uses the half word 378// multiply vmpyh that takes two v2i16 and returns a v2i32. This is 379// then truncated to fit this back into a v2i16 and to simulate the 380// wrap around semantics for unsigned in C. 381def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt), 382 (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>; 383 384def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)), 385 (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)), 386 (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>; 387 388// Multiplies two v4i16 vectors. 389def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)), 390 (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)), 391 (vmpyh (LoReg $Rs), (LoReg $Rt)))>; 392 393def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt), 394 (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))), 395 (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>; 396 397// Multiplies two v4i8 vectors. 398def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), 399 (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>, 400 Requires<[HasV5T]>; 401 402def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)), 403 (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>; 404 405// Multiplies two v8i8 vectors. 406def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), 407 (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))), 408 (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>, 409 Requires<[HasV5T]>; 410 411def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)), 412 (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))), 413 (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>; 414 415 416class shuffler<SDNode Op, string Str> 417 : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c), 418 "$a = " # Str # "($b, $c)", 419 [(set (i64 DoubleRegs:$a), 420 (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))], 421 "", S_3op_tc_1_SLOT23>; 422 423def SDTHexagonBinOp64 : SDTypeProfile<1, 2, 424 [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>; 425 426def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>; 427def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>; 428def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>; 429def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>; 430 431class ShufflePat<InstHexagon MI, SDNode Op> 432 : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)), 433 (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>; 434 435// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b 436def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>; 437 438// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b 439def: ShufflePat<S2_shuffob, HexagonSHUFFOB>; 440 441// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h 442def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>; 443 444// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h 445def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>; 446 447 448// Truncated store from v4i16 to v4i8. 449def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr), 450 (truncstore node:$val, node:$ptr), 451 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>; 452 453// Truncated store from v2i32 to v2i16. 454def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr), 455 (truncstore node:$val, node:$ptr), 456 [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>; 457 458def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt), 459 (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs), 460 (LoReg $Rs))))>; 461 462def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt), 463 (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>; 464 465 466// Zero and sign extended load from v2i8 into v2i16. 467def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr), 468 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; 469 470def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr), 471 [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>; 472 473def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)), 474 (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>; 475 476def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)), 477 (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>; 478 479def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)), 480 (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>; 481 482def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)), 483 (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>; 484