1//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====//
2//
3//                     Cell SPU 64-bit operations
4//
5//===----------------------------------------------------------------------===//
6
7//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
8// 64-bit comparisons:
9//
10// 1. The instruction sequences for vector vice scalar differ by a
11//    constant. In the scalar case, we're only interested in the
12//    top two 32-bit slots, whereas we're interested in an exact
13//    all-four-slot match in the vector case.
14//
15// 2. There are no "immediate" forms, since loading 64-bit constants
16//    could be a constant pool load.
17//
18// 3. i64 setcc results are i32, which are subsequently converted to a FSM
19//    mask when used in a select pattern.
20//
21// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO)
22//    [Note: this may be moot, since gb produces v4i32 or r32.]
23//
24// 5. The code sequences for r64 and v2i64 are probably overly conservative,
25//    compared to the code that gcc produces.
26//
27// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!)
28//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
29
30// selb instruction definition for i64. Note that the selection mask is
31// a vector, produced by various forms of FSM:
32def SELBr64_cond:
33  SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC),
34           [/* no pattern */]>;
35
36// The generic i64 select pattern, which assumes that the comparison result
37// is in a 32-bit register that contains a select mask pattern (i.e., gather
38// bits result):
39
40def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue),
41          (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>;
42
43// select the negative condition:
44class I64SELECTNegCond<PatFrag cond, CodeFrag compare>:
45  Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse),
46      (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>;
47
48// setcc the negative condition:
49class I64SETCCNegCond<PatFrag cond, CodeFrag compare>:
50  Pat<(cond R64C:$rA, R64C:$rB),
51      (XORIr32 compare.Fragment, -1)>;
52
53//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
54// The i64 seteq fragment that does the scalar->vector conversion and
55// comparison:
56def CEQr64compare:
57    CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
58                                           (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>;
59
60// The i64 seteq fragment that does the vector comparison
61def CEQv2i64compare:
62    CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>;
63
64// i64 seteq (equality): the setcc result is i32, which is converted to a
65// vector FSM mask when used in a select pattern.
66//
67// v2i64 seteq (equality): the setcc result is v4i32
68multiclass CompareEqual64 {
69  // Plain old comparison, converts back to i32 scalar
70  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>;
71  def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>;
72
73  // SELB mask from FSM:
74  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
75                               (FSMv4i32 CEQr64compare.Fragment), R32C))>;
76  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
77                               (FSMv4i32 CEQv2i64compare.Fragment), R32C))>;
78}
79
80defm I64EQ: CompareEqual64;
81
82def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>;
83def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>;
84
85// i64 setne:
86def : I64SETCCNegCond<setne, I64EQr64>;
87def : I64SELECTNegCond<setne, I64EQr64>;
88
89//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
90// i64 setugt/setule:
91//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
92
93def CLGTr64ugt:
94    CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
95                        (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
96
97def CLGTr64eq:
98    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
99                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
100
101def CLGTr64compare:
102    CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment,
103                        (XSWDv2i64 CLGTr64ugt.Fragment),
104                        CLGTr64eq.Fragment)>;
105
106def CLGTv2i64ugt:
107    CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>;
108
109def CLGTv2i64eq:
110    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
111
112def CLGTv2i64compare:
113    CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment,
114                        (XSWDv2i64 CLGTr64ugt.Fragment),
115                        CLGTv2i64eq.Fragment)>;
116
117multiclass CompareLogicalGreaterThan64 {
118  // Plain old comparison, converts back to i32 scalar
119  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>;
120  def v2i64: CodeFrag<CLGTv2i64compare.Fragment>;
121
122  // SELB mask from FSM:
123  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
124                               (FSMv4i32 CLGTr64compare.Fragment), R32C))>;
125  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
126                               (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>;
127}
128
129defm I64LGT: CompareLogicalGreaterThan64;
130
131def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
132//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
133//          I64LGTv2i64.Fragment>;
134
135// i64 setult:
136def : I64SETCCNegCond<setule, I64LGTr64>;
137def : I64SELECTNegCond<setule, I64LGTr64>;
138
139//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
140// i64 setuge/setult:
141//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
142
143def CLGEr64compare:
144    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment,
145                                          CLGTr64eq.Fragment)), 0xb)>;
146
147def CLGEv2i64compare:
148    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment,
149                                          CLGTv2i64eq.Fragment)), 0xf)>;
150
151multiclass CompareLogicalGreaterEqual64 {
152  // Plain old comparison, converts back to i32 scalar
153  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>;
154  def v2i64: CodeFrag<CLGEv2i64compare.Fragment>;
155
156  // SELB mask from FSM:
157  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
158                           (FSMv4i32 CLGEr64compare.Fragment), R32C))>;
159  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
160                           (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>;
161}
162
163defm I64LGE: CompareLogicalGreaterEqual64;
164
165def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>;
166def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
167          I64LGEv2i64.Fragment>;
168
169
170// i64 setult:
171def : I64SETCCNegCond<setult, I64LGEr64>;
172def : I64SELECTNegCond<setult, I64LGEr64>;
173
174//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
175// i64 setgt/setle:
176//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
177
178def CGTr64sgt:
179    CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
180                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
181
182def CGTr64eq:
183    CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG),
184                       (COPY_TO_REGCLASS R64C:$rB, VECREG))>;
185
186def CGTr64compare:
187    CodeFrag<(SELBv2i64 CGTr64sgt.Fragment,
188                        (XSWDv2i64 CGTr64sgt.Fragment),
189                        CGTr64eq.Fragment)>;
190
191def CGTv2i64sgt:
192    CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>;
193
194def CGTv2i64eq:
195    CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>;
196
197def CGTv2i64compare:
198    CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment,
199                        (XSWDv2i64 CGTr64sgt.Fragment),
200                        CGTv2i64eq.Fragment)>;
201
202multiclass CompareGreaterThan64 {
203  // Plain old comparison, converts back to i32 scalar
204  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>;
205  def v2i64: CodeFrag<CGTv2i64compare.Fragment>;
206
207  // SELB mask from FSM:
208  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
209                             (FSMv4i32 CGTr64compare.Fragment), R32C))>;
210  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS
211                               (FSMv4i32 CGTv2i64compare.Fragment), R32C))>;
212}
213
214defm I64GT: CompareLogicalGreaterThan64;
215
216def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
217//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
218//                  I64GTv2i64.Fragment>;
219
220// i64 setult:
221def : I64SETCCNegCond<setle, I64GTr64>;
222def : I64SELECTNegCond<setle, I64GTr64>;
223
224//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
225// i64 setge/setlt:
226//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
227
228def CGEr64compare:
229    CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment,
230                                          CGTr64eq.Fragment)), 0xb)>;
231
232def CGEv2i64compare:
233    CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment,
234                                          CGTv2i64eq.Fragment)), 0xf)>;
235
236multiclass CompareGreaterEqual64 {
237  // Plain old comparison, converts back to i32 scalar
238  def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>;
239  def v2i64: CodeFrag<CGEv2i64compare.Fragment>;
240
241  // SELB mask from FSM:
242  def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>;
243  def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>;
244}
245
246defm I64GE: CompareGreaterEqual64;
247
248def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>;
249def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))),
250          I64GEv2i64.Fragment>;
251
252// i64 setult:
253def : I64SETCCNegCond<setlt, I64GEr64>;
254def : I64SELECTNegCond<setlt, I64GEr64>;
255
256//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
257// v2i64, i64 add
258//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
259
260class v2i64_add_cg<dag lhs, dag rhs>:
261    CodeFrag<(CGv4i32 lhs, rhs)>;
262
263class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>:
264    CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>;
265
266class v2i64_add<dag lhs, dag rhs, dag cg_mask>:
267    v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>;
268
269def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
270           (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG),
271                                  (COPY_TO_REGCLASS R64C:$rB, VECREG),
272                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
273
274def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
275                    (v4i32 VECREG:$rCGmask)),
276           v2i64_add<(v2i64 VECREG:$rA),
277                     (v2i64 VECREG:$rB),
278                     (v4i32 VECREG:$rCGmask)>.Fragment>;
279
280//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
281// v2i64, i64 subtraction
282//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
283
284class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>;
285
286class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>:
287    CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>;
288
289def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
290           (COPY_TO_REGCLASS
291               v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG),
292                         (COPY_TO_REGCLASS R64C:$rB, VECREG),
293                         v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG),
294                                      (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment,
295                                  (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
296
297def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
298                    (v4i32 VECREG:$rCGmask)),
299           v2i64_sub<(v2i64 VECREG:$rA),
300                     (v2i64 VECREG:$rB),
301                     v2i64_sub_bg<(v2i64 VECREG:$rA),
302                                  (v2i64 VECREG:$rB)>.Fragment,
303                     (v4i32 VECREG:$rCGmask)>.Fragment>;
304
305//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
306// v2i64, i64 multiply
307//
308// Note: i64 multiply is simply the vector->scalar conversion of the
309// full-on v2i64 multiply, since the entire vector has to be manipulated
310// anyway.
311//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
312
313class v2i64_mul_ahi64<dag rA> :
314    CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
315
316class v2i64_mul_bhi64<dag rB> :
317    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>;
318
319class v2i64_mul_alo64<dag rB> :
320    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
321
322class v2i64_mul_blo64<dag rB> :
323    CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>;
324
325class v2i64_mul_ashlq2<dag rA>:
326    CodeFrag<(SHLQBYIv4i32 rA, 0x2)>;
327
328class v2i64_mul_ashlq4<dag rA>:
329    CodeFrag<(SHLQBYIv4i32 rA, 0x4)>;
330
331class v2i64_mul_bshlq2<dag rB> :
332    CodeFrag<(SHLQBYIv4i32 rB, 0x2)>;
333
334class v2i64_mul_bshlq4<dag rB> :
335    CodeFrag<(SHLQBYIv4i32 rB, 0x4)>;
336
337class v2i64_highprod<dag rA, dag rB>:
338    CodeFrag<(Av4i32
339                (Av4i32
340                  (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment,     // a1 x b3
341                             v2i64_mul_ahi64<rA>.Fragment),
342                  (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment,      // a0 x b3
343                             v2i64_mul_bshlq4<rB>.Fragment)),
344                (Av4i32
345                  (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment,
346                             v2i64_mul_ashlq4<rA>.Fragment),
347                  (Av4i32
348                      (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment,
349                                 v2i64_mul_bhi64<rB>.Fragment),
350                    (Av4i32
351                      (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment,
352                                 v2i64_mul_bhi64<rB>.Fragment),
353                      (Av4i32
354                        (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment,
355                                   v2i64_mul_bshlq2<rB>.Fragment),
356                        (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment,
357                                   v2i64_mul_bshlq2<rB>.Fragment))))))>;
358
359class v2i64_mul_a3_b3<dag rA, dag rB>:
360    CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment,
361                        v2i64_mul_blo64<rB>.Fragment)>;
362
363class v2i64_mul_a2_b3<dag rA, dag rB>:
364    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
365                          (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment,
366                                       v2i64_mul_bshlq2<rB>.Fragment), 0x2),
367                        (ILv4i32 0),
368                        (FSMBIv4i32 0xc3c3))>;
369
370class v2i64_mul_a3_b2<dag rA, dag rB>:
371    CodeFrag<(SELBv4i32 (SHLQBYIv4i32
372                          (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment,
373                                       v2i64_mul_ashlq2<rA>.Fragment), 0x2),
374                        (ILv4i32 0),
375                        (FSMBIv4i32 0xc3c3))>;
376
377class v2i64_lowsum<dag rA, dag rB, dag rCGmask>:
378    v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment,
379                        v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment,
380              v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>;
381
382class v2i64_mul<dag rA, dag rB, dag rCGmask>:
383    v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment,
384              (SELBv4i32 v2i64_highprod<rA, rB>.Fragment,
385                         (ILv4i32 0),
386                         (FSMBIv4i32 0x0f0f)),
387              rCGmask>;
388
389def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)),
390          (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG),
391                                 (COPY_TO_REGCLASS R64C:$rB, VECREG),
392                                 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>;
393
394def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
395                    (v4i32 VECREG:$rCGmask)),
396          v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB),
397                    (v4i32 VECREG:$rCGmask)>.Fragment>;
398
399//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
400// f64 comparisons
401//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
402
403// selb instruction definition for i64. Note that the selection mask is
404// a vector, produced by various forms of FSM:
405def SELBf64_cond:
406   SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC),
407            [(set R64FP:$rT,
408                  (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>;
409