1//====--- SPU64InstrInfo.td - Cell SPU 64-bit operations -*- tablegen -*--====// 2// 3// Cell SPU 64-bit operations 4// 5//===----------------------------------------------------------------------===// 6 7//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 8// 64-bit comparisons: 9// 10// 1. The instruction sequences for vector vice scalar differ by a 11// constant. In the scalar case, we're only interested in the 12// top two 32-bit slots, whereas we're interested in an exact 13// all-four-slot match in the vector case. 14// 15// 2. There are no "immediate" forms, since loading 64-bit constants 16// could be a constant pool load. 17// 18// 3. i64 setcc results are i32, which are subsequently converted to a FSM 19// mask when used in a select pattern. 20// 21// 4. v2i64 setcc results are v4i32, which can be converted to a FSM mask (TODO) 22// [Note: this may be moot, since gb produces v4i32 or r32.] 23// 24// 5. The code sequences for r64 and v2i64 are probably overly conservative, 25// compared to the code that gcc produces. 26// 27// M00$E B!tes Kan be Pretty N@sTi!!!!! (apologies to Monty!) 28//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 29 30// selb instruction definition for i64. Note that the selection mask is 31// a vector, produced by various forms of FSM: 32def SELBr64_cond: 33 SELBInst<(outs R64C:$rT), (ins R64C:$rA, R64C:$rB, VECREG:$rC), 34 [/* no pattern */]>; 35 36// The generic i64 select pattern, which assumes that the comparison result 37// is in a 32-bit register that contains a select mask pattern (i.e., gather 38// bits result): 39 40def : Pat<(select R32C:$rCond, R64C:$rFalse, R64C:$rTrue), 41 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 R32C:$rCond))>; 42 43// select the negative condition: 44class I64SELECTNegCond<PatFrag cond, CodeFrag compare>: 45 Pat<(select (i32 (cond R64C:$rA, R64C:$rB)), R64C:$rTrue, R64C:$rFalse), 46 (SELBr64_cond R64C:$rTrue, R64C:$rFalse, (FSMr32 compare.Fragment))>; 47 48// setcc the negative condition: 49class I64SETCCNegCond<PatFrag cond, CodeFrag compare>: 50 Pat<(cond R64C:$rA, R64C:$rB), 51 (XORIr32 compare.Fragment, -1)>; 52 53//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 54// The i64 seteq fragment that does the scalar->vector conversion and 55// comparison: 56def CEQr64compare: 57 CodeFrag<(CGTIv4i32 (GBv4i32 (CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 58 (COPY_TO_REGCLASS R64C:$rB, VECREG))), 0xb)>; 59 60// The i64 seteq fragment that does the vector comparison 61def CEQv2i64compare: 62 CodeFrag<(CEQIv4i32 (GBv4i32 (CEQv4i32 VECREG:$rA, VECREG:$rB)), 0xf)>; 63 64// i64 seteq (equality): the setcc result is i32, which is converted to a 65// vector FSM mask when used in a select pattern. 66// 67// v2i64 seteq (equality): the setcc result is v4i32 68multiclass CompareEqual64 { 69 // Plain old comparison, converts back to i32 scalar 70 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQr64compare.Fragment, R32C))>; 71 def v2i64: CodeFrag<(i32 (COPY_TO_REGCLASS CEQv2i64compare.Fragment, R32C))>; 72 73 // SELB mask from FSM: 74 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 75 (FSMv4i32 CEQr64compare.Fragment), R32C))>; 76 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 77 (FSMv4i32 CEQv2i64compare.Fragment), R32C))>; 78} 79 80defm I64EQ: CompareEqual64; 81 82def : Pat<(seteq R64C:$rA, R64C:$rB), I64EQr64.Fragment>; 83def : Pat<(seteq (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), I64EQv2i64.Fragment>; 84 85// i64 setne: 86def : I64SETCCNegCond<setne, I64EQr64>; 87def : I64SELECTNegCond<setne, I64EQr64>; 88 89//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 90// i64 setugt/setule: 91//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 92 93def CLGTr64ugt: 94 CodeFrag<(CLGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 95 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 96 97def CLGTr64eq: 98 CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 99 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 100 101def CLGTr64compare: 102 CodeFrag<(SELBv2i64 CLGTr64ugt.Fragment, 103 (XSWDv2i64 CLGTr64ugt.Fragment), 104 CLGTr64eq.Fragment)>; 105 106def CLGTv2i64ugt: 107 CodeFrag<(CLGTv4i32 VECREG:$rA, VECREG:$rB)>; 108 109def CLGTv2i64eq: 110 CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; 111 112def CLGTv2i64compare: 113 CodeFrag<(SELBv2i64 CLGTv2i64ugt.Fragment, 114 (XSWDv2i64 CLGTr64ugt.Fragment), 115 CLGTv2i64eq.Fragment)>; 116 117multiclass CompareLogicalGreaterThan64 { 118 // Plain old comparison, converts back to i32 scalar 119 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGTr64compare.Fragment, R32C))>; 120 def v2i64: CodeFrag<CLGTv2i64compare.Fragment>; 121 122 // SELB mask from FSM: 123 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 124 (FSMv4i32 CLGTr64compare.Fragment), R32C))>; 125 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 126 (FSMv4i32 CLGTv2i64compare.Fragment), R32C))>; 127} 128 129defm I64LGT: CompareLogicalGreaterThan64; 130 131def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>; 132//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), 133// I64LGTv2i64.Fragment>; 134 135// i64 setult: 136def : I64SETCCNegCond<setule, I64LGTr64>; 137def : I64SELECTNegCond<setule, I64LGTr64>; 138 139//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 140// i64 setuge/setult: 141//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 142 143def CLGEr64compare: 144 CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CLGTr64ugt.Fragment, 145 CLGTr64eq.Fragment)), 0xb)>; 146 147def CLGEv2i64compare: 148 CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CLGTv2i64ugt.Fragment, 149 CLGTv2i64eq.Fragment)), 0xf)>; 150 151multiclass CompareLogicalGreaterEqual64 { 152 // Plain old comparison, converts back to i32 scalar 153 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CLGEr64compare.Fragment, R32C))>; 154 def v2i64: CodeFrag<CLGEv2i64compare.Fragment>; 155 156 // SELB mask from FSM: 157 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 158 (FSMv4i32 CLGEr64compare.Fragment), R32C))>; 159 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 160 (FSMv4i32 CLGEv2i64compare.Fragment),R32C))>; 161} 162 163defm I64LGE: CompareLogicalGreaterEqual64; 164 165def : Pat<(setuge R64C:$rA, R64C:$rB), I64LGEr64.Fragment>; 166def : Pat<(v2i64 (setuge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), 167 I64LGEv2i64.Fragment>; 168 169 170// i64 setult: 171def : I64SETCCNegCond<setult, I64LGEr64>; 172def : I64SELECTNegCond<setult, I64LGEr64>; 173 174//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 175// i64 setgt/setle: 176//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 177 178def CGTr64sgt: 179 CodeFrag<(CGTv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 180 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 181 182def CGTr64eq: 183 CodeFrag<(CEQv4i32 (COPY_TO_REGCLASS R64C:$rA, VECREG), 184 (COPY_TO_REGCLASS R64C:$rB, VECREG))>; 185 186def CGTr64compare: 187 CodeFrag<(SELBv2i64 CGTr64sgt.Fragment, 188 (XSWDv2i64 CGTr64sgt.Fragment), 189 CGTr64eq.Fragment)>; 190 191def CGTv2i64sgt: 192 CodeFrag<(CGTv4i32 VECREG:$rA, VECREG:$rB)>; 193 194def CGTv2i64eq: 195 CodeFrag<(CEQv4i32 VECREG:$rA, VECREG:$rB)>; 196 197def CGTv2i64compare: 198 CodeFrag<(SELBv2i64 CGTv2i64sgt.Fragment, 199 (XSWDv2i64 CGTr64sgt.Fragment), 200 CGTv2i64eq.Fragment)>; 201 202multiclass CompareGreaterThan64 { 203 // Plain old comparison, converts back to i32 scalar 204 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGTr64compare.Fragment, R32C))>; 205 def v2i64: CodeFrag<CGTv2i64compare.Fragment>; 206 207 // SELB mask from FSM: 208 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 209 (FSMv4i32 CGTr64compare.Fragment), R32C))>; 210 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS 211 (FSMv4i32 CGTv2i64compare.Fragment), R32C))>; 212} 213 214defm I64GT: CompareLogicalGreaterThan64; 215 216def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>; 217//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)), 218// I64GTv2i64.Fragment>; 219 220// i64 setult: 221def : I64SETCCNegCond<setle, I64GTr64>; 222def : I64SELECTNegCond<setle, I64GTr64>; 223 224//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 225// i64 setge/setlt: 226//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 227 228def CGEr64compare: 229 CodeFrag<(CGTIv4i32 (GBv4i32 (ORv4i32 CGTr64sgt.Fragment, 230 CGTr64eq.Fragment)), 0xb)>; 231 232def CGEv2i64compare: 233 CodeFrag<(CEQIv4i32 (GBv4i32 (ORv4i32 CGTv2i64sgt.Fragment, 234 CGTv2i64eq.Fragment)), 0xf)>; 235 236multiclass CompareGreaterEqual64 { 237 // Plain old comparison, converts back to i32 scalar 238 def r64: CodeFrag<(i32 (COPY_TO_REGCLASS CGEr64compare.Fragment, R32C))>; 239 def v2i64: CodeFrag<CGEv2i64compare.Fragment>; 240 241 // SELB mask from FSM: 242 def r64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEr64compare.Fragment),R32C))>; 243 def v2i64mask: CodeFrag<(i32 (COPY_TO_REGCLASS (FSMv4i32 CGEv2i64compare.Fragment),R32C))>; 244} 245 246defm I64GE: CompareGreaterEqual64; 247 248def : Pat<(setge R64C:$rA, R64C:$rB), I64GEr64.Fragment>; 249def : Pat<(v2i64 (setge (v2i64 VECREG:$rA), (v2i64 VECREG:$rB))), 250 I64GEv2i64.Fragment>; 251 252// i64 setult: 253def : I64SETCCNegCond<setlt, I64GEr64>; 254def : I64SELECTNegCond<setlt, I64GEr64>; 255 256//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 257// v2i64, i64 add 258//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 259 260class v2i64_add_cg<dag lhs, dag rhs>: 261 CodeFrag<(CGv4i32 lhs, rhs)>; 262 263class v2i64_add_1<dag lhs, dag rhs, dag cg, dag cg_mask>: 264 CodeFrag<(ADDXv4i32 lhs, rhs, (SHUFBv4i32 cg, cg, cg_mask))>; 265 266class v2i64_add<dag lhs, dag rhs, dag cg_mask>: 267 v2i64_add_1<lhs, rhs, v2i64_add_cg<lhs, rhs>.Fragment, cg_mask>; 268 269def : Pat<(SPUadd64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), 270 (COPY_TO_REGCLASS v2i64_add<(COPY_TO_REGCLASS R64C:$rA, VECREG), 271 (COPY_TO_REGCLASS R64C:$rB, VECREG), 272 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; 273 274def : Pat<(SPUadd64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 275 (v4i32 VECREG:$rCGmask)), 276 v2i64_add<(v2i64 VECREG:$rA), 277 (v2i64 VECREG:$rB), 278 (v4i32 VECREG:$rCGmask)>.Fragment>; 279 280//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 281// v2i64, i64 subtraction 282//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 283 284class v2i64_sub_bg<dag lhs, dag rhs>: CodeFrag<(BGv4i32 lhs, rhs)>; 285 286class v2i64_sub<dag lhs, dag rhs, dag bg, dag bg_mask>: 287 CodeFrag<(SFXv4i32 lhs, rhs, (SHUFBv4i32 bg, bg, bg_mask))>; 288 289def : Pat<(SPUsub64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), 290 (COPY_TO_REGCLASS 291 v2i64_sub<(COPY_TO_REGCLASS R64C:$rA, VECREG), 292 (COPY_TO_REGCLASS R64C:$rB, VECREG), 293 v2i64_sub_bg<(COPY_TO_REGCLASS R64C:$rA, VECREG), 294 (COPY_TO_REGCLASS R64C:$rB, VECREG)>.Fragment, 295 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; 296 297def : Pat<(SPUsub64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 298 (v4i32 VECREG:$rCGmask)), 299 v2i64_sub<(v2i64 VECREG:$rA), 300 (v2i64 VECREG:$rB), 301 v2i64_sub_bg<(v2i64 VECREG:$rA), 302 (v2i64 VECREG:$rB)>.Fragment, 303 (v4i32 VECREG:$rCGmask)>.Fragment>; 304 305//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 306// v2i64, i64 multiply 307// 308// Note: i64 multiply is simply the vector->scalar conversion of the 309// full-on v2i64 multiply, since the entire vector has to be manipulated 310// anyway. 311//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 312 313class v2i64_mul_ahi64<dag rA> : 314 CodeFrag<(SELBv4i32 rA, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; 315 316class v2i64_mul_bhi64<dag rB> : 317 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0x0f0f))>; 318 319class v2i64_mul_alo64<dag rB> : 320 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; 321 322class v2i64_mul_blo64<dag rB> : 323 CodeFrag<(SELBv4i32 rB, (ILv4i32 0), (FSMBIv4i32 0xf0f0))>; 324 325class v2i64_mul_ashlq2<dag rA>: 326 CodeFrag<(SHLQBYIv4i32 rA, 0x2)>; 327 328class v2i64_mul_ashlq4<dag rA>: 329 CodeFrag<(SHLQBYIv4i32 rA, 0x4)>; 330 331class v2i64_mul_bshlq2<dag rB> : 332 CodeFrag<(SHLQBYIv4i32 rB, 0x2)>; 333 334class v2i64_mul_bshlq4<dag rB> : 335 CodeFrag<(SHLQBYIv4i32 rB, 0x4)>; 336 337class v2i64_highprod<dag rA, dag rB>: 338 CodeFrag<(Av4i32 339 (Av4i32 340 (MPYUv4i32 v2i64_mul_bshlq4<rB>.Fragment, // a1 x b3 341 v2i64_mul_ahi64<rA>.Fragment), 342 (MPYHv4i32 v2i64_mul_ahi64<rA>.Fragment, // a0 x b3 343 v2i64_mul_bshlq4<rB>.Fragment)), 344 (Av4i32 345 (MPYHv4i32 v2i64_mul_bhi64<rB>.Fragment, 346 v2i64_mul_ashlq4<rA>.Fragment), 347 (Av4i32 348 (MPYHv4i32 v2i64_mul_ashlq4<rA>.Fragment, 349 v2i64_mul_bhi64<rB>.Fragment), 350 (Av4i32 351 (MPYUv4i32 v2i64_mul_ashlq4<rA>.Fragment, 352 v2i64_mul_bhi64<rB>.Fragment), 353 (Av4i32 354 (MPYHv4i32 v2i64_mul_ashlq2<rA>.Fragment, 355 v2i64_mul_bshlq2<rB>.Fragment), 356 (MPYUv4i32 v2i64_mul_ashlq2<rA>.Fragment, 357 v2i64_mul_bshlq2<rB>.Fragment))))))>; 358 359class v2i64_mul_a3_b3<dag rA, dag rB>: 360 CodeFrag<(MPYUv4i32 v2i64_mul_alo64<rA>.Fragment, 361 v2i64_mul_blo64<rB>.Fragment)>; 362 363class v2i64_mul_a2_b3<dag rA, dag rB>: 364 CodeFrag<(SELBv4i32 (SHLQBYIv4i32 365 (MPYHHUv4i32 v2i64_mul_alo64<rA>.Fragment, 366 v2i64_mul_bshlq2<rB>.Fragment), 0x2), 367 (ILv4i32 0), 368 (FSMBIv4i32 0xc3c3))>; 369 370class v2i64_mul_a3_b2<dag rA, dag rB>: 371 CodeFrag<(SELBv4i32 (SHLQBYIv4i32 372 (MPYHHUv4i32 v2i64_mul_blo64<rB>.Fragment, 373 v2i64_mul_ashlq2<rA>.Fragment), 0x2), 374 (ILv4i32 0), 375 (FSMBIv4i32 0xc3c3))>; 376 377class v2i64_lowsum<dag rA, dag rB, dag rCGmask>: 378 v2i64_add<v2i64_add<v2i64_mul_a3_b3<rA, rB>.Fragment, 379 v2i64_mul_a2_b3<rA, rB>.Fragment, rCGmask>.Fragment, 380 v2i64_mul_a3_b2<rA, rB>.Fragment, rCGmask>; 381 382class v2i64_mul<dag rA, dag rB, dag rCGmask>: 383 v2i64_add<v2i64_lowsum<rA, rB, rCGmask>.Fragment, 384 (SELBv4i32 v2i64_highprod<rA, rB>.Fragment, 385 (ILv4i32 0), 386 (FSMBIv4i32 0x0f0f)), 387 rCGmask>; 388 389def : Pat<(SPUmul64 R64C:$rA, R64C:$rB, (v4i32 VECREG:$rCGmask)), 390 (COPY_TO_REGCLASS v2i64_mul<(COPY_TO_REGCLASS R64C:$rA, VECREG), 391 (COPY_TO_REGCLASS R64C:$rB, VECREG), 392 (v4i32 VECREG:$rCGmask)>.Fragment, R64C)>; 393 394def : Pat<(SPUmul64 (v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 395 (v4i32 VECREG:$rCGmask)), 396 v2i64_mul<(v2i64 VECREG:$rA), (v2i64 VECREG:$rB), 397 (v4i32 VECREG:$rCGmask)>.Fragment>; 398 399//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 400// f64 comparisons 401//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~ 402 403// selb instruction definition for i64. Note that the selection mask is 404// a vector, produced by various forms of FSM: 405def SELBf64_cond: 406 SELBInst<(outs R64FP:$rT), (ins R64FP:$rA, R64FP:$rB, R32C:$rC), 407 [(set R64FP:$rT, 408 (select R32C:$rC, R64FP:$rB, R64FP:$rA))]>; 409