1 /*
2  * Copyright (c) 2008-2016 Stefan Krah. All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 
29 #include "mpdecimal.h"
30 #include <stdio.h>
31 #include <assert.h>
32 #include "bits.h"
33 #include "numbertheory.h"
34 #include "umodarith.h"
35 #include "difradix2.h"
36 
37 
38 /* Bignum: The actual transform routine (decimation in frequency). */
39 
40 
41 /*
42  * Generate index pairs (x, bitreverse(x)) and carry out the permutation.
43  * n must be a power of two.
44  * Algorithm due to Brent/Lehmann, see Joerg Arndt, "Matters Computational",
45  * Chapter 1.14.4. [http://www.jjj.de/fxt/]
46  */
47 static inline void
bitreverse_permute(mpd_uint_t a[],mpd_size_t n)48 bitreverse_permute(mpd_uint_t a[], mpd_size_t n)
49 {
50     mpd_size_t x = 0;
51     mpd_size_t r = 0;
52     mpd_uint_t t;
53 
54     do { /* Invariant: r = bitreverse(x) */
55         if (r > x) {
56             t = a[x];
57             a[x] = a[r];
58             a[r] = t;
59         }
60         /* Flip trailing consecutive 1 bits and the first zero bit
61          * that absorbs a possible carry. */
62         x += 1;
63         /* Mirror the operation on r: Flip n_trailing_zeros(x)+1
64            high bits of r. */
65         r ^= (n - (n >> (mpd_bsf(x)+1)));
66         /* The loop invariant is preserved. */
67     } while (x < n);
68 }
69 
70 
71 /* Fast Number Theoretic Transform, decimation in frequency. */
72 void
fnt_dif2(mpd_uint_t a[],mpd_size_t n,struct fnt_params * tparams)73 fnt_dif2(mpd_uint_t a[], mpd_size_t n, struct fnt_params *tparams)
74 {
75     mpd_uint_t *wtable = tparams->wtable;
76     mpd_uint_t umod;
77 #ifdef PPRO
78     double dmod;
79     uint32_t dinvmod[3];
80 #endif
81     mpd_uint_t u0, u1, v0, v1;
82     mpd_uint_t w, w0, w1, wstep;
83     mpd_size_t m, mhalf;
84     mpd_size_t j, r;
85 
86 
87     assert(ispower2(n));
88     assert(n >= 4);
89 
90     SETMODULUS(tparams->modnum);
91 
92     /* m == n */
93     mhalf = n / 2;
94     for (j = 0; j < mhalf; j += 2) {
95 
96         w0 = wtable[j];
97         w1 = wtable[j+1];
98 
99         u0 = a[j];
100         v0 = a[j+mhalf];
101 
102         u1 = a[j+1];
103         v1 = a[j+1+mhalf];
104 
105         a[j] = addmod(u0, v0, umod);
106         v0 = submod(u0, v0, umod);
107 
108         a[j+1] = addmod(u1, v1, umod);
109         v1 = submod(u1, v1, umod);
110 
111         MULMOD2(&v0, w0, &v1, w1);
112 
113         a[j+mhalf] = v0;
114         a[j+1+mhalf] = v1;
115 
116     }
117 
118     wstep = 2;
119     for (m = n/2; m >= 2; m>>=1, wstep<<=1) {
120 
121         mhalf = m / 2;
122 
123         /* j == 0 */
124         for (r = 0; r < n; r += 2*m) {
125 
126             u0 = a[r];
127             v0 = a[r+mhalf];
128 
129             u1 = a[m+r];
130             v1 = a[m+r+mhalf];
131 
132             a[r] = addmod(u0, v0, umod);
133             v0 = submod(u0, v0, umod);
134 
135             a[m+r] = addmod(u1, v1, umod);
136             v1 = submod(u1, v1, umod);
137 
138             a[r+mhalf] = v0;
139             a[m+r+mhalf] = v1;
140         }
141 
142         for (j = 1; j < mhalf; j++) {
143 
144             w = wtable[j*wstep];
145 
146             for (r = 0; r < n; r += 2*m) {
147 
148                 u0 = a[r+j];
149                 v0 = a[r+j+mhalf];
150 
151                 u1 = a[m+r+j];
152                 v1 = a[m+r+j+mhalf];
153 
154                 a[r+j] = addmod(u0, v0, umod);
155                 v0 = submod(u0, v0, umod);
156 
157                 a[m+r+j] = addmod(u1, v1, umod);
158                 v1 = submod(u1, v1, umod);
159 
160                 MULMOD2C(&v0, &v1, w);
161 
162                 a[r+j+mhalf] = v0;
163                 a[m+r+j+mhalf] = v1;
164             }
165 
166         }
167 
168     }
169 
170     bitreverse_permute(a, n);
171 }
172 
173 
174