1//===- HexagonInstrInfoVector.td - Hexagon Vector Patterns -*- tablegen -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file describes the Hexagon Vector instructions in TableGen format.
11//
12//===----------------------------------------------------------------------===//
13
14def V2I1:  PatLeaf<(v2i1  PredRegs:$R)>;
15def V4I1:  PatLeaf<(v4i1  PredRegs:$R)>;
16def V8I1:  PatLeaf<(v8i1  PredRegs:$R)>;
17def V4I8:  PatLeaf<(v4i8  IntRegs:$R)>;
18def V2I16: PatLeaf<(v2i16 IntRegs:$R)>;
19def V8I8:  PatLeaf<(v8i8  DoubleRegs:$R)>;
20def V4I16: PatLeaf<(v4i16 DoubleRegs:$R)>;
21def V2I32: PatLeaf<(v2i32 DoubleRegs:$R)>;
22
23
24multiclass bitconvert_32<ValueType a, ValueType b> {
25  def : Pat <(b (bitconvert (a IntRegs:$src))),
26             (b IntRegs:$src)>;
27  def : Pat <(a (bitconvert (b IntRegs:$src))),
28             (a IntRegs:$src)>;
29}
30
31multiclass bitconvert_64<ValueType a, ValueType b> {
32  def : Pat <(b (bitconvert (a DoubleRegs:$src))),
33             (b DoubleRegs:$src)>;
34  def : Pat <(a (bitconvert (b DoubleRegs:$src))),
35             (a DoubleRegs:$src)>;
36}
37
38multiclass bitconvert_vec<ValueType a, ValueType b> {
39  def : Pat <(b (bitconvert (a VectorRegs:$src))),
40             (b  VectorRegs:$src)>;
41  def : Pat <(a (bitconvert (b VectorRegs:$src))),
42             (a  VectorRegs:$src)>;
43}
44
45multiclass bitconvert_dblvec<ValueType a, ValueType b> {
46  def : Pat <(b (bitconvert (a VecDblRegs:$src))),
47             (b  VecDblRegs:$src)>;
48  def : Pat <(a (bitconvert (b VecDblRegs:$src))),
49             (a  VecDblRegs:$src)>;
50}
51
52multiclass bitconvert_predvec<ValueType a, ValueType b> {
53  def : Pat <(b (bitconvert (a VecPredRegs:$src))),
54             (b  VectorRegs:$src)>;
55  def : Pat <(a (bitconvert (b VectorRegs:$src))),
56             (a  VecPredRegs:$src)>;
57}
58
59multiclass bitconvert_dblvec128B<ValueType a, ValueType b> {
60  def : Pat <(b (bitconvert (a VecDblRegs128B:$src))),
61             (b  VecDblRegs128B:$src)>;
62  def : Pat <(a (bitconvert (b VecDblRegs128B:$src))),
63             (a  VecDblRegs128B:$src)>;
64}
65
66// Bit convert vector types.
67defm : bitconvert_32<v4i8, i32>;
68defm : bitconvert_32<v2i16, i32>;
69defm : bitconvert_32<v2i16, v4i8>;
70
71defm : bitconvert_64<v8i8, i64>;
72defm : bitconvert_64<v4i16, i64>;
73defm : bitconvert_64<v2i32, i64>;
74defm : bitconvert_64<v8i8, v4i16>;
75defm : bitconvert_64<v8i8, v2i32>;
76defm : bitconvert_64<v4i16, v2i32>;
77
78defm : bitconvert_vec<v64i8, v16i32>;
79defm : bitconvert_vec<v8i64 , v16i32>;
80defm : bitconvert_vec<v32i16, v16i32>;
81
82defm : bitconvert_dblvec<v16i64, v128i8>;
83defm : bitconvert_dblvec<v32i32, v128i8>;
84defm : bitconvert_dblvec<v64i16, v128i8>;
85
86defm : bitconvert_dblvec128B<v64i32, v128i16>;
87defm : bitconvert_dblvec128B<v256i8, v128i16>;
88defm : bitconvert_dblvec128B<v32i64, v128i16>;
89
90defm : bitconvert_dblvec128B<v64i32, v256i8>;
91defm : bitconvert_dblvec128B<v32i64, v256i8>;
92defm : bitconvert_dblvec128B<v128i16, v256i8>;
93
94// Vector shift support. Vector shifting in Hexagon is rather different
95// from internal representation of LLVM.
96// LLVM assumes all shifts (in vector case) will have the form
97// <VT> = SHL/SRA/SRL <VT> by <VT>
98// while Hexagon has the following format:
99// <VT> = SHL/SRA/SRL <VT> by <IT/i32>
100// As a result, special care is needed to guarantee correctness and
101// performance.
102class vshift_v4i16<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
103  : S_2OpInstImm<Str, MajOp, MinOp, u4Imm,
104      [(set (v4i16 DoubleRegs:$dst),
105            (Op (v4i16 DoubleRegs:$src1), u4ImmPred:$src2))]> {
106  bits<4> src2;
107  let Inst{11-8} = src2;
108}
109
110class vshift_v2i32<SDNode Op, string Str, bits<3>MajOp, bits<3>MinOp>
111  : S_2OpInstImm<Str, MajOp, MinOp, u5Imm,
112      [(set (v2i32 DoubleRegs:$dst),
113            (Op (v2i32 DoubleRegs:$src1), u5ImmPred:$src2))]> {
114  bits<5> src2;
115  let Inst{12-8} = src2;
116}
117
118def : Pat<(v2i16 (add (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
119          (A2_svaddh IntRegs:$src1, IntRegs:$src2)>;
120
121def : Pat<(v2i16 (sub (v2i16 IntRegs:$src1), (v2i16 IntRegs:$src2))),
122          (A2_svsubh IntRegs:$src1, IntRegs:$src2)>;
123
124def S2_asr_i_vw : vshift_v2i32<sra, "vasrw", 0b010, 0b000>;
125def S2_lsr_i_vw : vshift_v2i32<srl, "vlsrw", 0b010, 0b001>;
126def S2_asl_i_vw : vshift_v2i32<shl, "vaslw", 0b010, 0b010>;
127
128def S2_asr_i_vh : vshift_v4i16<sra, "vasrh", 0b100, 0b000>;
129def S2_lsr_i_vh : vshift_v4i16<srl, "vlsrh", 0b100, 0b001>;
130def S2_asl_i_vh : vshift_v4i16<shl, "vaslh", 0b100, 0b010>;
131
132
133def HexagonVSPLATB: SDNode<"HexagonISD::VSPLATB", SDTUnaryOp>;
134def HexagonVSPLATH: SDNode<"HexagonISD::VSPLATH", SDTUnaryOp>;
135
136// Replicate the low 8-bits from 32-bits input register into each of the
137// four bytes of 32-bits destination register.
138def: Pat<(v4i8  (HexagonVSPLATB I32:$Rs)), (S2_vsplatrb I32:$Rs)>;
139
140// Replicate the low 16-bits from 32-bits input register into each of the
141// four halfwords of 64-bits destination register.
142def: Pat<(v4i16 (HexagonVSPLATH I32:$Rs)), (S2_vsplatrh I32:$Rs)>;
143
144
145class VArith_pat <InstHexagon MI, SDNode Op, PatFrag Type>
146  : Pat <(Op Type:$Rss, Type:$Rtt),
147         (MI Type:$Rss, Type:$Rtt)>;
148
149def: VArith_pat <A2_vaddub, add, V8I8>;
150def: VArith_pat <A2_vaddh,  add, V4I16>;
151def: VArith_pat <A2_vaddw,  add, V2I32>;
152def: VArith_pat <A2_vsubub, sub, V8I8>;
153def: VArith_pat <A2_vsubh,  sub, V4I16>;
154def: VArith_pat <A2_vsubw,  sub, V2I32>;
155
156def: VArith_pat <A2_and,    and, V2I16>;
157def: VArith_pat <A2_xor,    xor, V2I16>;
158def: VArith_pat <A2_or,     or,  V2I16>;
159
160def: VArith_pat <A2_andp,   and, V8I8>;
161def: VArith_pat <A2_andp,   and, V4I16>;
162def: VArith_pat <A2_andp,   and, V2I32>;
163def: VArith_pat <A2_orp,    or,  V8I8>;
164def: VArith_pat <A2_orp,    or,  V4I16>;
165def: VArith_pat <A2_orp,    or,  V2I32>;
166def: VArith_pat <A2_xorp,   xor, V8I8>;
167def: VArith_pat <A2_xorp,   xor, V4I16>;
168def: VArith_pat <A2_xorp,   xor, V2I32>;
169
170def: Pat<(v2i32 (sra V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
171                                                    (i32 u5ImmPred:$c))))),
172         (S2_asr_i_vw V2I32:$b, imm:$c)>;
173def: Pat<(v2i32 (srl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
174                                                    (i32 u5ImmPred:$c))))),
175         (S2_lsr_i_vw V2I32:$b, imm:$c)>;
176def: Pat<(v2i32 (shl V2I32:$b, (i64 (HexagonCOMBINE (i32 u5ImmPred:$c),
177                                                    (i32 u5ImmPred:$c))))),
178         (S2_asl_i_vw V2I32:$b, imm:$c)>;
179
180def: Pat<(v4i16 (sra V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
181         (S2_asr_i_vh V4I16:$b, imm:$c)>;
182def: Pat<(v4i16 (srl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
183         (S2_lsr_i_vh V4I16:$b, imm:$c)>;
184def: Pat<(v4i16 (shl V4I16:$b, (v4i16 (HexagonVSPLATH (i32 (u4ImmPred:$c)))))),
185         (S2_asl_i_vh V4I16:$b, imm:$c)>;
186
187
188def SDTHexagon_v2i32_v2i32_i32 : SDTypeProfile<1, 2,
189  [SDTCisSameAs<0, 1>, SDTCisVT<0, v2i32>, SDTCisInt<2>]>;
190def SDTHexagon_v4i16_v4i16_i32 : SDTypeProfile<1, 2,
191  [SDTCisSameAs<0, 1>, SDTCisVT<0, v4i16>, SDTCisInt<2>]>;
192
193def HexagonVSRAW: SDNode<"HexagonISD::VSRAW", SDTHexagon_v2i32_v2i32_i32>;
194def HexagonVSRAH: SDNode<"HexagonISD::VSRAH", SDTHexagon_v4i16_v4i16_i32>;
195def HexagonVSRLW: SDNode<"HexagonISD::VSRLW", SDTHexagon_v2i32_v2i32_i32>;
196def HexagonVSRLH: SDNode<"HexagonISD::VSRLH", SDTHexagon_v4i16_v4i16_i32>;
197def HexagonVSHLW: SDNode<"HexagonISD::VSHLW", SDTHexagon_v2i32_v2i32_i32>;
198def HexagonVSHLH: SDNode<"HexagonISD::VSHLH", SDTHexagon_v4i16_v4i16_i32>;
199
200def: Pat<(v2i32 (HexagonVSRAW V2I32:$Rs, u5ImmPred:$u5)),
201         (S2_asr_i_vw V2I32:$Rs, imm:$u5)>;
202def: Pat<(v4i16 (HexagonVSRAH V4I16:$Rs, u4ImmPred:$u4)),
203         (S2_asr_i_vh V4I16:$Rs, imm:$u4)>;
204def: Pat<(v2i32 (HexagonVSRLW V2I32:$Rs, u5ImmPred:$u5)),
205         (S2_lsr_i_vw V2I32:$Rs, imm:$u5)>;
206def: Pat<(v4i16 (HexagonVSRLH V4I16:$Rs, u4ImmPred:$u4)),
207         (S2_lsr_i_vh V4I16:$Rs, imm:$u4)>;
208def: Pat<(v2i32 (HexagonVSHLW V2I32:$Rs, u5ImmPred:$u5)),
209         (S2_asl_i_vw V2I32:$Rs, imm:$u5)>;
210def: Pat<(v4i16 (HexagonVSHLH V4I16:$Rs, u4ImmPred:$u4)),
211         (S2_asl_i_vh V4I16:$Rs, imm:$u4)>;
212
213// Vector shift words by register
214def S2_asr_r_vw : T_S3op_shiftVect < "vasrw", 0b00, 0b00>;
215def S2_lsr_r_vw : T_S3op_shiftVect < "vlsrw", 0b00, 0b01>;
216def S2_asl_r_vw : T_S3op_shiftVect < "vaslw", 0b00, 0b10>;
217def S2_lsl_r_vw : T_S3op_shiftVect < "vlslw", 0b00, 0b11>;
218
219// Vector shift halfwords by register
220def S2_asr_r_vh : T_S3op_shiftVect < "vasrh", 0b01, 0b00>;
221def S2_lsr_r_vh : T_S3op_shiftVect < "vlsrh", 0b01, 0b01>;
222def S2_asl_r_vh : T_S3op_shiftVect < "vaslh", 0b01, 0b10>;
223def S2_lsl_r_vh : T_S3op_shiftVect < "vlslh", 0b01, 0b11>;
224
225class vshift_rr_pat<InstHexagon MI, SDNode Op, PatFrag Value>
226  : Pat <(Op Value:$Rs, I32:$Rt),
227         (MI Value:$Rs, I32:$Rt)>;
228
229def: vshift_rr_pat <S2_asr_r_vw, HexagonVSRAW, V2I32>;
230def: vshift_rr_pat <S2_asr_r_vh, HexagonVSRAH, V4I16>;
231def: vshift_rr_pat <S2_lsr_r_vw, HexagonVSRLW, V2I32>;
232def: vshift_rr_pat <S2_lsr_r_vh, HexagonVSRLH, V4I16>;
233def: vshift_rr_pat <S2_asl_r_vw, HexagonVSHLW, V2I32>;
234def: vshift_rr_pat <S2_asl_r_vh, HexagonVSHLH, V4I16>;
235
236
237def SDTHexagonVecCompare_v8i8 : SDTypeProfile<1, 2,
238  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v8i8>]>;
239def SDTHexagonVecCompare_v4i16 : SDTypeProfile<1, 2,
240  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v4i16>]>;
241def SDTHexagonVecCompare_v2i32 : SDTypeProfile<1, 2,
242  [SDTCisSameAs<1, 2>, SDTCisVT<0, i1>, SDTCisVT<1, v2i32>]>;
243
244def HexagonVCMPBEQ:  SDNode<"HexagonISD::VCMPBEQ",  SDTHexagonVecCompare_v8i8>;
245def HexagonVCMPBGT:  SDNode<"HexagonISD::VCMPBGT",  SDTHexagonVecCompare_v8i8>;
246def HexagonVCMPBGTU: SDNode<"HexagonISD::VCMPBGTU", SDTHexagonVecCompare_v8i8>;
247def HexagonVCMPHEQ:  SDNode<"HexagonISD::VCMPHEQ",  SDTHexagonVecCompare_v4i16>;
248def HexagonVCMPHGT:  SDNode<"HexagonISD::VCMPHGT",  SDTHexagonVecCompare_v4i16>;
249def HexagonVCMPHGTU: SDNode<"HexagonISD::VCMPHGTU", SDTHexagonVecCompare_v4i16>;
250def HexagonVCMPWEQ:  SDNode<"HexagonISD::VCMPWEQ",  SDTHexagonVecCompare_v2i32>;
251def HexagonVCMPWGT:  SDNode<"HexagonISD::VCMPWGT",  SDTHexagonVecCompare_v2i32>;
252def HexagonVCMPWGTU: SDNode<"HexagonISD::VCMPWGTU", SDTHexagonVecCompare_v2i32>;
253
254
255class vcmp_i1_pat<InstHexagon MI, SDNode Op, PatFrag Value>
256  : Pat <(i1 (Op Value:$Rs, Value:$Rt)),
257         (MI Value:$Rs, Value:$Rt)>;
258
259def: vcmp_i1_pat<A2_vcmpbeq,  HexagonVCMPBEQ,  V8I8>;
260def: vcmp_i1_pat<A4_vcmpbgt,  HexagonVCMPBGT,  V8I8>;
261def: vcmp_i1_pat<A2_vcmpbgtu, HexagonVCMPBGTU, V8I8>;
262
263def: vcmp_i1_pat<A2_vcmpheq,  HexagonVCMPHEQ,  V4I16>;
264def: vcmp_i1_pat<A2_vcmphgt,  HexagonVCMPHGT,  V4I16>;
265def: vcmp_i1_pat<A2_vcmphgtu, HexagonVCMPHGTU, V4I16>;
266
267def: vcmp_i1_pat<A2_vcmpweq,  HexagonVCMPWEQ,  V2I32>;
268def: vcmp_i1_pat<A2_vcmpwgt,  HexagonVCMPWGT,  V2I32>;
269def: vcmp_i1_pat<A2_vcmpwgtu, HexagonVCMPWGTU, V2I32>;
270
271
272class vcmp_vi1_pat<InstHexagon MI, PatFrag Op, PatFrag InVal, ValueType OutTy>
273  : Pat <(OutTy (Op InVal:$Rs, InVal:$Rt)),
274         (MI InVal:$Rs, InVal:$Rt)>;
275
276def: vcmp_vi1_pat<A2_vcmpweq,  seteq,  V2I32, v2i1>;
277def: vcmp_vi1_pat<A2_vcmpwgt,  setgt,  V2I32, v2i1>;
278def: vcmp_vi1_pat<A2_vcmpwgtu, setugt, V2I32, v2i1>;
279
280def: vcmp_vi1_pat<A2_vcmpheq,  seteq,  V4I16, v4i1>;
281def: vcmp_vi1_pat<A2_vcmphgt,  setgt,  V4I16, v4i1>;
282def: vcmp_vi1_pat<A2_vcmphgtu, setugt, V4I16, v4i1>;
283
284
285// Hexagon doesn't have a vector multiply with C semantics.
286// Instead, generate a pseudo instruction that gets expaneded into two
287// scalar MPYI instructions.
288// This is expanded by ExpandPostRAPseudos.
289let isPseudo = 1 in
290def VMULW : PseudoM<(outs DoubleRegs:$Rd),
291      (ins DoubleRegs:$Rs, DoubleRegs:$Rt),
292      ".error \"Should never try to emit VMULW\"",
293      [(set V2I32:$Rd, (mul V2I32:$Rs, V2I32:$Rt))]>;
294
295let isPseudo = 1 in
296def VMULW_ACC : PseudoM<(outs DoubleRegs:$Rd),
297      (ins DoubleRegs:$Rx, DoubleRegs:$Rs, DoubleRegs:$Rt),
298      ".error \"Should never try to emit VMULW_ACC\"",
299      [(set V2I32:$Rd, (add V2I32:$Rx, (mul V2I32:$Rs, V2I32:$Rt)))],
300      "$Rd = $Rx">;
301
302// Adds two v4i8: Hexagon does not have an insn for this one, so we
303// use the double add v8i8, and use only the low part of the result.
304def: Pat<(v4i8 (add (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
305         (LoReg (A2_vaddub (Zext64 $Rs), (Zext64 $Rt)))>;
306
307// Subtract two v4i8: Hexagon does not have an insn for this one, so we
308// use the double sub v8i8, and use only the low part of the result.
309def: Pat<(v4i8 (sub (v4i8 IntRegs:$Rs), (v4i8 IntRegs:$Rt))),
310         (LoReg (A2_vsubub (Zext64 $Rs), (Zext64 $Rt)))>;
311
312//
313// No 32 bit vector mux.
314//
315def: Pat<(v4i8 (select I1:$Pu, V4I8:$Rs, V4I8:$Rt)),
316         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
317def: Pat<(v2i16 (select I1:$Pu, V2I16:$Rs, V2I16:$Rt)),
318         (LoReg (C2_vmux I1:$Pu, (Zext64 $Rs), (Zext64 $Rt)))>;
319
320//
321// 64-bit vector mux.
322//
323def: Pat<(v8i8 (vselect V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)),
324         (C2_vmux V8I1:$Pu, V8I8:$Rs, V8I8:$Rt)>;
325def: Pat<(v4i16 (vselect V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)),
326         (C2_vmux V4I1:$Pu, V4I16:$Rs, V4I16:$Rt)>;
327def: Pat<(v2i32 (vselect V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)),
328         (C2_vmux V2I1:$Pu, V2I32:$Rs, V2I32:$Rt)>;
329
330//
331// No 32 bit vector compare.
332//
333def: Pat<(i1 (seteq V4I8:$Rs, V4I8:$Rt)),
334         (A2_vcmpbeq (Zext64 $Rs), (Zext64 $Rt))>;
335def: Pat<(i1 (setgt V4I8:$Rs, V4I8:$Rt)),
336         (A4_vcmpbgt (Zext64 $Rs), (Zext64 $Rt))>;
337def: Pat<(i1 (setugt V4I8:$Rs, V4I8:$Rt)),
338         (A2_vcmpbgtu (Zext64 $Rs), (Zext64 $Rt))>;
339
340def: Pat<(i1 (seteq V2I16:$Rs, V2I16:$Rt)),
341         (A2_vcmpheq (Zext64 $Rs), (Zext64 $Rt))>;
342def: Pat<(i1 (setgt V2I16:$Rs, V2I16:$Rt)),
343         (A2_vcmphgt (Zext64 $Rs), (Zext64 $Rt))>;
344def: Pat<(i1 (setugt V2I16:$Rs, V2I16:$Rt)),
345         (A2_vcmphgtu (Zext64 $Rs), (Zext64 $Rt))>;
346
347
348class InvertCmp_pat<InstHexagon InvMI, PatFrag CmpOp, PatFrag Value,
349                    ValueType CmpTy>
350  : Pat<(CmpTy (CmpOp Value:$Rs, Value:$Rt)),
351        (InvMI Value:$Rt, Value:$Rs)>;
352
353// Map from a compare operation to the corresponding instruction with the
354// order of operands reversed, e.g.  x > y --> cmp.lt(y,x).
355def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  i1>;
356def: InvertCmp_pat<A4_vcmpbgt,  setlt,  V8I8,  v8i1>;
357def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, i1>;
358def: InvertCmp_pat<A2_vcmphgt,  setlt,  V4I16, v4i1>;
359def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, i1>;
360def: InvertCmp_pat<A2_vcmpwgt,  setlt,  V2I32, v2i1>;
361
362def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  i1>;
363def: InvertCmp_pat<A2_vcmpbgtu, setult, V8I8,  v8i1>;
364def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, i1>;
365def: InvertCmp_pat<A2_vcmphgtu, setult, V4I16, v4i1>;
366def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, i1>;
367def: InvertCmp_pat<A2_vcmpwgtu, setult, V2I32, v2i1>;
368
369// Map from vcmpne(Rss) -> !vcmpew(Rss).
370// rs != rt -> !(rs == rt).
371def: Pat<(v2i1 (setne V2I32:$Rs, V2I32:$Rt)),
372         (C2_not (v2i1 (A2_vcmpbeq V2I32:$Rs, V2I32:$Rt)))>;
373
374
375// Truncate: from vector B copy all 'E'ven 'B'yte elements:
376// A[0] = B[0];  A[1] = B[2];  A[2] = B[4];  A[3] = B[6];
377def: Pat<(v4i8 (trunc V4I16:$Rs)),
378         (S2_vtrunehb V4I16:$Rs)>;
379
380// Truncate: from vector B copy all 'O'dd 'B'yte elements:
381// A[0] = B[1];  A[1] = B[3];  A[2] = B[5];  A[3] = B[7];
382// S2_vtrunohb
383
384// Truncate: from vectors B and C copy all 'E'ven 'H'alf-word elements:
385// A[0] = B[0];  A[1] = B[2];  A[2] = C[0];  A[3] = C[2];
386// S2_vtruneh
387
388def: Pat<(v2i16 (trunc V2I32:$Rs)),
389         (LoReg (S2_packhl (HiReg $Rs), (LoReg $Rs)))>;
390
391
392def HexagonVSXTBH : SDNode<"HexagonISD::VSXTBH", SDTUnaryOp>;
393def HexagonVSXTBW : SDNode<"HexagonISD::VSXTBW", SDTUnaryOp>;
394
395def: Pat<(i64 (HexagonVSXTBH I32:$Rs)), (S2_vsxtbh I32:$Rs)>;
396def: Pat<(i64 (HexagonVSXTBW I32:$Rs)), (S2_vsxthw I32:$Rs)>;
397
398def: Pat<(v4i16 (zext   V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
399def: Pat<(v2i32 (zext   V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
400def: Pat<(v4i16 (anyext V4I8:$Rs)),  (S2_vzxtbh V4I8:$Rs)>;
401def: Pat<(v2i32 (anyext V2I16:$Rs)), (S2_vzxthw V2I16:$Rs)>;
402def: Pat<(v4i16 (sext   V4I8:$Rs)),  (S2_vsxtbh V4I8:$Rs)>;
403def: Pat<(v2i32 (sext   V2I16:$Rs)), (S2_vsxthw V2I16:$Rs)>;
404
405// Sign extends a v2i8 into a v2i32.
406def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i8)),
407         (A2_combinew (A2_sxtb (HiReg $Rs)), (A2_sxtb (LoReg $Rs)))>;
408
409// Sign extends a v2i16 into a v2i32.
410def: Pat<(v2i32 (sext_inreg V2I32:$Rs, v2i16)),
411         (A2_combinew (A2_sxth (HiReg $Rs)), (A2_sxth (LoReg $Rs)))>;
412
413
414// Multiplies two v2i16 and returns a v2i32.  We are using here the
415// saturating multiply, as hexagon does not provide a non saturating
416// vector multiply, and saturation does not impact the result that is
417// in double precision of the operands.
418
419// Multiplies two v2i16 vectors: as Hexagon does not have a multiply
420// with the C semantics for this one, this pattern uses the half word
421// multiply vmpyh that takes two v2i16 and returns a v2i32.  This is
422// then truncated to fit this back into a v2i16 and to simulate the
423// wrap around semantics for unsigned in C.
424def vmpyh: OutPatFrag<(ops node:$Rs, node:$Rt),
425                      (M2_vmpy2s_s0 (i32 $Rs), (i32 $Rt))>;
426
427def: Pat<(v2i16 (mul V2I16:$Rs, V2I16:$Rt)),
428         (LoReg (S2_vtrunewh (v2i32 (A2_combineii 0, 0)),
429                             (v2i32 (vmpyh V2I16:$Rs, V2I16:$Rt))))>;
430
431// Multiplies two v4i16 vectors.
432def: Pat<(v4i16 (mul V4I16:$Rs, V4I16:$Rt)),
433         (S2_vtrunewh (vmpyh (HiReg $Rs), (HiReg $Rt)),
434                      (vmpyh (LoReg $Rs), (LoReg $Rt)))>;
435
436def VMPYB_no_V5: OutPatFrag<(ops node:$Rs, node:$Rt),
437  (S2_vtrunewh (vmpyh (HiReg (S2_vsxtbh $Rs)), (HiReg (S2_vsxtbh $Rt))),
438               (vmpyh (LoReg (S2_vsxtbh $Rs)), (LoReg (S2_vsxtbh $Rt))))>;
439
440// Multiplies two v4i8 vectors.
441def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
442         (S2_vtrunehb (M5_vmpybsu V4I8:$Rs, V4I8:$Rt))>,
443     Requires<[HasV5T]>;
444
445def: Pat<(v4i8 (mul V4I8:$Rs, V4I8:$Rt)),
446         (S2_vtrunehb (VMPYB_no_V5 V4I8:$Rs, V4I8:$Rt))>;
447
448// Multiplies two v8i8 vectors.
449def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
450         (A2_combinew (S2_vtrunehb (M5_vmpybsu (HiReg $Rs), (HiReg $Rt))),
451                      (S2_vtrunehb (M5_vmpybsu (LoReg $Rs), (LoReg $Rt))))>,
452     Requires<[HasV5T]>;
453
454def: Pat<(v8i8 (mul V8I8:$Rs, V8I8:$Rt)),
455         (A2_combinew (S2_vtrunehb (VMPYB_no_V5 (HiReg $Rs), (HiReg $Rt))),
456                      (S2_vtrunehb (VMPYB_no_V5 (LoReg $Rs), (LoReg $Rt))))>;
457
458
459class shuffler<SDNode Op, string Str>
460  : SInst<(outs DoubleRegs:$a), (ins DoubleRegs:$b, DoubleRegs:$c),
461      "$a = " # Str # "($b, $c)",
462      [(set (i64 DoubleRegs:$a),
463            (i64 (Op (i64 DoubleRegs:$b), (i64 DoubleRegs:$c))))],
464      "", S_3op_tc_1_SLOT23>;
465
466def SDTHexagonBinOp64 : SDTypeProfile<1, 2,
467  [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<0, i64>]>;
468
469def HexagonSHUFFEB: SDNode<"HexagonISD::SHUFFEB", SDTHexagonBinOp64>;
470def HexagonSHUFFEH: SDNode<"HexagonISD::SHUFFEH", SDTHexagonBinOp64>;
471def HexagonSHUFFOB: SDNode<"HexagonISD::SHUFFOB", SDTHexagonBinOp64>;
472def HexagonSHUFFOH: SDNode<"HexagonISD::SHUFFOH", SDTHexagonBinOp64>;
473
474class ShufflePat<InstHexagon MI, SDNode Op>
475  : Pat<(i64 (Op DoubleRegs:$src1, DoubleRegs:$src2)),
476        (i64 (MI DoubleRegs:$src1, DoubleRegs:$src2))>;
477
478// Shuffles even bytes for i=0..3: A[2*i].b = C[2*i].b; A[2*i+1].b = B[2*i].b
479def: ShufflePat<S2_shuffeb, HexagonSHUFFEB>;
480
481// Shuffles odd bytes for i=0..3: A[2*i].b = C[2*i+1].b; A[2*i+1].b = B[2*i+1].b
482def: ShufflePat<S2_shuffob, HexagonSHUFFOB>;
483
484// Shuffles even half for i=0,1: A[2*i].h = C[2*i].h; A[2*i+1].h = B[2*i].h
485def: ShufflePat<S2_shuffeh, HexagonSHUFFEH>;
486
487// Shuffles odd half for i=0,1: A[2*i].h = C[2*i+1].h; A[2*i+1].h = B[2*i+1].h
488def: ShufflePat<S2_shuffoh, HexagonSHUFFOH>;
489
490
491// Truncated store from v4i16 to v4i8.
492def truncstorev4i8: PatFrag<(ops node:$val, node:$ptr),
493                            (truncstore node:$val, node:$ptr),
494    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v4i8; }]>;
495
496// Truncated store from v2i32 to v2i16.
497def truncstorev2i16: PatFrag<(ops node:$val, node:$ptr),
498                             (truncstore node:$val, node:$ptr),
499    [{ return cast<StoreSDNode>(N)->getMemoryVT() == MVT::v2i16; }]>;
500
501def: Pat<(truncstorev2i16 V2I32:$Rs, I32:$Rt),
502         (S2_storeri_io I32:$Rt, 0, (LoReg (S2_packhl (HiReg $Rs),
503                                                      (LoReg $Rs))))>;
504
505def: Pat<(truncstorev4i8 V4I16:$Rs, I32:$Rt),
506         (S2_storeri_io I32:$Rt, 0, (S2_vtrunehb V4I16:$Rs))>;
507
508
509// Zero and sign extended load from v2i8 into v2i16.
510def zextloadv2i8: PatFrag<(ops node:$ptr), (zextload node:$ptr),
511    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
512
513def sextloadv2i8: PatFrag<(ops node:$ptr), (sextload node:$ptr),
514    [{ return cast<LoadSDNode>(N)->getMemoryVT() == MVT::v2i8; }]>;
515
516def: Pat<(v2i16 (zextloadv2i8 I32:$Rs)),
517         (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0))))>;
518
519def: Pat<(v2i16 (sextloadv2i8 I32:$Rs)),
520         (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0))))>;
521
522def: Pat<(v2i32 (zextloadv2i8 I32:$Rs)),
523         (S2_vzxthw (LoReg (v4i16 (S2_vzxtbh (L2_loadruh_io I32:$Rs, 0)))))>;
524
525def: Pat<(v2i32 (sextloadv2i8 I32:$Rs)),
526         (S2_vsxthw (LoReg (v4i16 (S2_vsxtbh (L2_loadrh_io I32:$Rs, 0)))))>;
527