1 /*Copyright (c) 2013, Xiph.Org Foundation and contributors.
2 
3   All rights reserved.
4 
5   Redistribution and use in source and binary forms, with or without
6    modification, are permitted provided that the following conditions are met:
7 
8     * Redistributions of source code must retain the above copyright notice,
9        this list of conditions and the following disclaimer.
10     * Redistributions in binary form must reproduce the above copyright notice,
11        this list of conditions and the following disclaimer in the
12        documentation and/or other materials provided with the distribution.
13 
14   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
15   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
18   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
20   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
21   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
22   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
23   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24   POSSIBILITY OF SUCH DAMAGE.*/
25 
26 #ifndef KISS_FFT_MIPSR1_H
27 #define KISS_FFT_MIPSR1_H
28 
29 #if !defined(KISS_FFT_GUTS_H)
30 #error "This file should only be included from _kiss_fft_guts.h"
31 #endif
32 
33 #ifdef FIXED_POINT
34 
35 #define S_MUL_ADD(a, b, c, d) (S_MUL(a,b)+S_MUL(c,d))
36 #define S_MUL_SUB(a, b, c, d) (S_MUL(a,b)-S_MUL(c,d))
37 
38 #undef S_MUL_ADD
S_MUL_ADD(int a,int b,int c,int d)39 static inline int S_MUL_ADD(int a, int b, int c, int d) {
40     int m;
41     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
42     asm volatile("madd $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
43     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
44     return m;
45 }
46 
47 #undef S_MUL_SUB
S_MUL_SUB(int a,int b,int c,int d)48 static inline int S_MUL_SUB(int a, int b, int c, int d) {
49     int m;
50     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a), "r" ((int)b));
51     asm volatile("msub $ac1, %0, %1" : : "r" ((int)c), "r" ((int)d));
52     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m): "i" (15));
53     return m;
54 }
55 
56 #undef C_MUL
57 #   define C_MUL(m,a,b) (m=C_MUL_fun(a,b))
C_MUL_fun(kiss_fft_cpx a,kiss_twiddle_cpx b)58 static inline kiss_fft_cpx C_MUL_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
59     kiss_fft_cpx m;
60 
61     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
62     asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
63     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
64     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
65     asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
66     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
67 
68     return m;
69 }
70 #undef C_MULC
71 #   define C_MULC(m,a,b) (m=C_MULC_fun(a,b))
C_MULC_fun(kiss_fft_cpx a,kiss_twiddle_cpx b)72 static inline kiss_fft_cpx C_MULC_fun(kiss_fft_cpx a, kiss_twiddle_cpx b) {
73     kiss_fft_cpx m;
74 
75     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.r));
76     asm volatile("madd $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.i));
77     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.r): "i" (15));
78     asm volatile("MULT $ac1, %0, %1" : : "r" ((int)a.i), "r" ((int)b.r));
79     asm volatile("msub $ac1, %0, %1" : : "r" ((int)a.r), "r" ((int)b.i));
80     asm volatile("EXTR.W %0,$ac1, %1" : "=r" (m.i): "i" (15));
81 
82     return m;
83 }
84 
85 #endif /* FIXED_POINT */
86 
87 #define OVERRIDE_kf_bfly5
kf_bfly5(kiss_fft_cpx * Fout,const size_t fstride,const kiss_fft_state * st,int m,int N,int mm)88 static void kf_bfly5(
89                      kiss_fft_cpx * Fout,
90                      const size_t fstride,
91                      const kiss_fft_state *st,
92                      int m,
93                      int N,
94                      int mm
95                     )
96 {
97    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
98    int i, u;
99    kiss_fft_cpx scratch[13];
100 
101    const kiss_twiddle_cpx *tw;
102    kiss_twiddle_cpx ya,yb;
103    kiss_fft_cpx * Fout_beg = Fout;
104 
105 #ifdef FIXED_POINT
106    ya.r = 10126;
107    ya.i = -31164;
108    yb.r = -26510;
109    yb.i = -19261;
110 #else
111    ya = st->twiddles[fstride*m];
112    yb = st->twiddles[fstride*2*m];
113 #endif
114 
115    tw=st->twiddles;
116 
117    for (i=0;i<N;i++)
118    {
119       Fout = Fout_beg + i*mm;
120       Fout0=Fout;
121       Fout1=Fout0+m;
122       Fout2=Fout0+2*m;
123       Fout3=Fout0+3*m;
124       Fout4=Fout0+4*m;
125 
126       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
127       for ( u=0; u<m; ++u ) {
128          scratch[0] = *Fout0;
129 
130 
131          C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
132          C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
133          C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
134          C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
135 
136          C_ADD( scratch[7],scratch[1],scratch[4]);
137          C_SUB( scratch[10],scratch[1],scratch[4]);
138          C_ADD( scratch[8],scratch[2],scratch[3]);
139          C_SUB( scratch[9],scratch[2],scratch[3]);
140 
141          Fout0->r += scratch[7].r + scratch[8].r;
142          Fout0->i += scratch[7].i + scratch[8].i;
143          scratch[5].r = scratch[0].r + S_MUL_ADD(scratch[7].r,ya.r,scratch[8].r,yb.r);
144          scratch[5].i = scratch[0].i + S_MUL_ADD(scratch[7].i,ya.r,scratch[8].i,yb.r);
145 
146          scratch[6].r =  S_MUL_ADD(scratch[10].i,ya.i,scratch[9].i,yb.i);
147          scratch[6].i =  -S_MUL_ADD(scratch[10].r,ya.i,scratch[9].r,yb.i);
148 
149          C_SUB(*Fout1,scratch[5],scratch[6]);
150          C_ADD(*Fout4,scratch[5],scratch[6]);
151 
152          scratch[11].r = scratch[0].r + S_MUL_ADD(scratch[7].r,yb.r,scratch[8].r,ya.r);
153          scratch[11].i = scratch[0].i + S_MUL_ADD(scratch[7].i,yb.r,scratch[8].i,ya.r);
154 
155          scratch[12].r =  S_MUL_SUB(scratch[9].i,ya.i,scratch[10].i,yb.i);
156          scratch[12].i =  S_MUL_SUB(scratch[10].r,yb.i,scratch[9].r,ya.i);
157 
158          C_ADD(*Fout2,scratch[11],scratch[12]);
159          C_SUB(*Fout3,scratch[11],scratch[12]);
160 
161          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
162       }
163    }
164 }
165 
166 
167 #endif /* KISS_FFT_MIPSR1_H */
168