1//======--- SPUMathInst.td - Cell SPU math operations -*- tablegen -*---======//
2//
3//                     Cell SPU math operations
4//
5// This target description file contains instruction sequences for various
6// math operations, such as vector multiplies, i32 multiply, etc., for the
7// SPU's i32, i16 i8 and corresponding vector types.
8//
9// Any resemblance to libsimdmath or the Cell SDK simdmath library is
10// purely and completely coincidental.
11//===----------------------------------------------------------------------===//
12
13//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
14// v16i8 multiply instruction sequence:
15//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
16
17def : Pat<(mul (v16i8 VECREG:$rA), (v16i8 VECREG:$rB)),
18          (ORv4i32
19           (ANDv4i32
20            (SELBv4i32 (MPYv8i16 VECREG:$rA, VECREG:$rB),
21                       (SHLHIv8i16 (MPYv8i16 (ROTMAHIv8i16 VECREG:$rA, 8),
22                                             (ROTMAHIv8i16 VECREG:$rB, 8)), 8),
23                       (FSMBIv8i16 0x2222)),
24            (ILAv4i32 0x0000ffff)),
25           (SHLIv4i32
26            (SELBv4i32 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 16),
27                                 (ROTMAIv4i32_i32 VECREG:$rB, 16)),
28                       (SHLHIv8i16 (MPYv8i16 (ROTMAIv4i32_i32 VECREG:$rA, 8),
29                                             (ROTMAIv4i32_i32 VECREG:$rB, 8)), 8),
30                       (FSMBIv8i16 0x2222)), 16))>;
31
32//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
33// v8i16 multiply instruction sequence:
34//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
35
36def : Pat<(mul (v8i16 VECREG:$rA), (v8i16 VECREG:$rB)),
37          (SELBv8i16 (MPYv8i16 VECREG:$rA, VECREG:$rB),
38                     (SHLIv4i32 (MPYHHv8i16 VECREG:$rA, VECREG:$rB), 16),
39                     (FSMBIv8i16 0xcccc))>;
40
41//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
42// v4i32, i32 multiply instruction sequence:
43//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
44
45def MPYv4i32:
46  Pat<(mul (v4i32 VECREG:$rA), (v4i32 VECREG:$rB)),
47      (Av4i32
48        (v4i32 (Av4i32 (v4i32 (MPYHv4i32 VECREG:$rA, VECREG:$rB)),
49                       (v4i32 (MPYHv4i32 VECREG:$rB, VECREG:$rA)))),
50        (v4i32 (MPYUv4i32 VECREG:$rA, VECREG:$rB)))>;
51
52def MPYi32:
53  Pat<(mul R32C:$rA, R32C:$rB),
54      (Ar32
55        (Ar32 (MPYHr32 R32C:$rA, R32C:$rB),
56              (MPYHr32 R32C:$rB, R32C:$rA)),
57        (MPYUr32 R32C:$rA, R32C:$rB))>;
58
59//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
60// f32, v4f32 divide instruction sequence:
61//-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~-~
62
63// Reciprocal estimate and interpolation
64def Interpf32: CodeFrag<(FIf32 R32FP:$rB, (FRESTf32 R32FP:$rB))>;
65// Division estimate
66def DivEstf32: CodeFrag<(FMf32 R32FP:$rA, Interpf32.Fragment)>;
67// Newton-Raphson iteration
68def NRaphf32: CodeFrag<(FMAf32 (FNMSf32 DivEstf32.Fragment, R32FP:$rB, R32FP:$rA),
69                               Interpf32.Fragment,
70                               DivEstf32.Fragment)>;
71// Epsilon addition
72def Epsilonf32: CodeFrag<(AIf32 NRaphf32.Fragment, 1)>;
73
74def : Pat<(fdiv R32FP:$rA, R32FP:$rB),
75          (SELBf32_cond NRaphf32.Fragment,
76                        Epsilonf32.Fragment,
77                        (CGTIf32 (FNMSf32 R32FP:$rB, Epsilonf32.Fragment, R32FP:$rA), -1))>;
78
79// Reciprocal estimate and interpolation
80def Interpv4f32: CodeFrag<(FIv4f32 (v4f32 VECREG:$rB), (FRESTv4f32 (v4f32 VECREG:$rB)))>;
81// Division estimate
82def DivEstv4f32: CodeFrag<(FMv4f32 (v4f32 VECREG:$rA), Interpv4f32.Fragment)>;
83// Newton-Raphson iteration
84def NRaphv4f32: CodeFrag<(FMAv4f32 (FNMSv4f32 DivEstv4f32.Fragment,
85                                              (v4f32 VECREG:$rB),
86                                              (v4f32 VECREG:$rA)),
87                                   Interpv4f32.Fragment,
88                                   DivEstv4f32.Fragment)>;
89// Epsilon addition
90def Epsilonv4f32: CodeFrag<(AIv4f32 NRaphv4f32.Fragment, 1)>;
91
92def : Pat<(fdiv (v4f32 VECREG:$rA), (v4f32 VECREG:$rB)),
93          (SELBv4f32_cond NRaphv4f32.Fragment,
94                        Epsilonv4f32.Fragment,
95                        (CGTIv4f32 (FNMSv4f32 (v4f32 VECREG:$rB),
96                                              Epsilonv4f32.Fragment,
97                                              (v4f32 VECREG:$rA)), -1))>;
98