1 /*
2 Fast Fourier/Cosine/Sine Transform
3 dimension :three
4 data length :power of 2
5 decimation :frequency
6 radix :split-radix, row-column
7 data :inplace
8 table :use
9 functions
10 cdft3d: Complex Discrete Fourier Transform
11 rdft3d: Real Discrete Fourier Transform
12 ddct3d: Discrete Cosine Transform
13 ddst3d: Discrete Sine Transform
14 function prototypes
15 void cdft3d(int, int, int, int, double ***, double *, int *, double *);
16 void rdft3d(int, int, int, int, double ***, double *, int *, double *);
17 void rdft3dsort(int, int, int, int, double ***);
18 void ddct3d(int, int, int, int, double ***, double *, int *, double *);
19 void ddst3d(int, int, int, int, double ***, double *, int *, double *);
20 necessary package
21 fftsg.c : 1D-FFT package
22 macro definitions
23 USE_FFT3D_PTHREADS : default=not defined
24 FFT3D_MAX_THREADS : must be 2^N, default=4
25 FFT3D_THREADS_BEGIN_N : default=65536
26 USE_FFT3D_WINTHREADS : default=not defined
27 FFT3D_MAX_THREADS : must be 2^N, default=4
28 FFT3D_THREADS_BEGIN_N : default=131072
29
30
31 -------- Complex DFT (Discrete Fourier Transform) --------
32 [definition]
33 <case1>
34 X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
35 x[j1][j2][j3] *
36 exp(2*pi*i*j1*k1/n1) *
37 exp(2*pi*i*j2*k2/n2) *
38 exp(2*pi*i*j3*k3/n3),
39 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
40 <case2>
41 X[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
42 x[j1][j2][j3] *
43 exp(-2*pi*i*j1*k1/n1) *
44 exp(-2*pi*i*j2*k2/n2) *
45 exp(-2*pi*i*j3*k3/n3),
46 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
47 (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
48 [usage]
49 <case1>
50 ip[0] = 0; // first time only
51 cdft3d(n1, n2, 2*n3, 1, a, t, ip, w);
52 <case2>
53 ip[0] = 0; // first time only
54 cdft3d(n1, n2, 2*n3, -1, a, t, ip, w);
55 [parameters]
56 n1 :data length (int)
57 n1 >= 1, n1 = power of 2
58 n2 :data length (int)
59 n2 >= 1, n2 = power of 2
60 2*n3 :data length (int)
61 n3 >= 1, n3 = power of 2
62 a[0...n1-1][0...n2-1][0...2*n3-1]
63 :input/output data (double ***)
64 input data
65 a[j1][j2][2*j3] = Re(x[j1][j2][j3]),
66 a[j1][j2][2*j3+1] = Im(x[j1][j2][j3]),
67 0<=j1<n1, 0<=j2<n2, 0<=j3<n3
68 output data
69 a[k1][k2][2*k3] = Re(X[k1][k2][k3]),
70 a[k1][k2][2*k3+1] = Im(X[k1][k2][k3]),
71 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
72 t[0...*]
73 :work area (double *)
74 length of t >= max(8*n1, 8*n2), if single thread,
75 length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS,
76 if multi threads,
77 t is dynamically allocated, if t == NULL.
78 ip[0...*]
79 :work area for bit reversal (int *)
80 length of ip >= 2+sqrt(n)
81 (n = max(n1, n2, n3))
82 ip[0],ip[1] are pointers of the cos/sin table.
83 w[0...*]
84 :cos/sin table (double *)
85 length of w >= max(n1/2, n2/2, n3/2)
86 w[],ip[] are initialized if ip[0] == 0.
87 [remark]
88 Inverse of
89 cdft3d(n1, n2, 2*n3, -1, a, t, ip, w);
90 is
91 cdft3d(n1, n2, 2*n3, 1, a, t, ip, w);
92 for (j1 = 0; j1 <= n1 - 1; j1++) {
93 for (j2 = 0; j2 <= n2 - 1; j2++) {
94 for (j3 = 0; j3 <= 2 * n3 - 1; j3++) {
95 a[j1][j2][j3] *= 1.0 / n1 / n2 / n3;
96 }
97 }
98 }
99 .
100
101
102 -------- Real DFT / Inverse of Real DFT --------
103 [definition]
104 <case1> RDFT
105 R[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
106 a[j1][j2][j3] *
107 cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
108 2*pi*j3*k3/n3),
109 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
110 I[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
111 a[j1][j2][j3] *
112 sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
113 2*pi*j3*k3/n3),
114 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
115 <case2> IRDFT (excluding scale)
116 a[k1][k2][k3] = (1/2) * sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
117 (R[j1][j2][j3] *
118 cos(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
119 2*pi*j3*k3/n3) +
120 I[j1][j2][j3] *
121 sin(2*pi*j1*k1/n1 + 2*pi*j2*k2/n2 +
122 2*pi*j3*k3/n3)),
123 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
124 (notes: R[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = R[k1][k2][k3],
125 I[(n1-k1)%n1][(n2-k2)%n2][(n3-k3)%n3] = -I[k1][k2][k3],
126 0<=k1<n1, 0<=k2<n2, 0<=k3<n3)
127 [usage]
128 <case1>
129 ip[0] = 0; // first time only
130 rdft3d(n1, n2, n3, 1, a, t, ip, w);
131 <case2>
132 ip[0] = 0; // first time only
133 rdft3d(n1, n2, n3, -1, a, t, ip, w);
134 [parameters]
135 n1 :data length (int)
136 n1 >= 2, n1 = power of 2
137 n2 :data length (int)
138 n2 >= 2, n2 = power of 2
139 n3 :data length (int)
140 n3 >= 2, n3 = power of 2
141 a[0...n1-1][0...n2-1][0...n3-1]
142 :input/output data (double ***)
143 <case1>
144 output data
145 a[k1][k2][2*k3] = R[k1][k2][k3]
146 = R[(n1-k1)%n1][(n2-k2)%n2][n3-k3],
147 a[k1][k2][2*k3+1] = I[k1][k2][k3]
148 = -I[(n1-k1)%n1][(n2-k2)%n2][n3-k3],
149 0<=k1<n1, 0<=k2<n2, 0<k3<n3/2,
150 (n%m : n mod m),
151 a[k1][k2][0] = R[k1][k2][0]
152 = R[(n1-k1)%n1][n2-k2][0],
153 a[k1][k2][1] = I[k1][k2][0]
154 = -I[(n1-k1)%n1][n2-k2][0],
155 a[k1][n2-k2][1] = R[(n1-k1)%n1][k2][n3/2]
156 = R[k1][n2-k2][n3/2],
157 a[k1][n2-k2][0] = -I[(n1-k1)%n1][k2][n3/2]
158 = I[k1][n2-k2][n3/2],
159 0<=k1<n1, 0<k2<n2/2,
160 a[k1][0][0] = R[k1][0][0]
161 = R[n1-k1][0][0],
162 a[k1][0][1] = I[k1][0][0]
163 = -I[n1-k1][0][0],
164 a[k1][n2/2][0] = R[k1][n2/2][0]
165 = R[n1-k1][n2/2][0],
166 a[k1][n2/2][1] = I[k1][n2/2][0]
167 = -I[n1-k1][n2/2][0],
168 a[n1-k1][0][1] = R[k1][0][n3/2]
169 = R[n1-k1][0][n3/2],
170 a[n1-k1][0][0] = -I[k1][0][n3/2]
171 = I[n1-k1][0][n3/2],
172 a[n1-k1][n2/2][1] = R[k1][n2/2][n3/2]
173 = R[n1-k1][n2/2][n3/2],
174 a[n1-k1][n2/2][0] = -I[k1][n2/2][n3/2]
175 = I[n1-k1][n2/2][n3/2],
176 0<k1<n1/2,
177 a[0][0][0] = R[0][0][0],
178 a[0][0][1] = R[0][0][n3/2],
179 a[0][n2/2][0] = R[0][n2/2][0],
180 a[0][n2/2][1] = R[0][n2/2][n3/2],
181 a[n1/2][0][0] = R[n1/2][0][0],
182 a[n1/2][0][1] = R[n1/2][0][n3/2],
183 a[n1/2][n2/2][0] = R[n1/2][n2/2][0],
184 a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2]
185 <case2>
186 input data
187 a[j1][j2][2*j3] = R[j1][j2][j3]
188 = R[(n1-j1)%n1][(n2-j2)%n2][n3-j3],
189 a[j1][j2][2*j3+1] = I[j1][j2][j3]
190 = -I[(n1-j1)%n1][(n2-j2)%n2][n3-j3],
191 0<=j1<n1, 0<=j2<n2, 0<j3<n3/2,
192 a[j1][j2][0] = R[j1][j2][0]
193 = R[(n1-j1)%n1][n2-j2][0],
194 a[j1][j2][1] = I[j1][j2][0]
195 = -I[(n1-j1)%n1][n2-j2][0],
196 a[j1][n2-j2][1] = R[(n1-j1)%n1][j2][n3/2]
197 = R[j1][n2-j2][n3/2],
198 a[j1][n2-j2][0] = -I[(n1-j1)%n1][j2][n3/2]
199 = I[j1][n2-j2][n3/2],
200 0<=j1<n1, 0<j2<n2/2,
201 a[j1][0][0] = R[j1][0][0]
202 = R[n1-j1][0][0],
203 a[j1][0][1] = I[j1][0][0]
204 = -I[n1-j1][0][0],
205 a[j1][n2/2][0] = R[j1][n2/2][0]
206 = R[n1-j1][n2/2][0],
207 a[j1][n2/2][1] = I[j1][n2/2][0]
208 = -I[n1-j1][n2/2][0],
209 a[n1-j1][0][1] = R[j1][0][n3/2]
210 = R[n1-j1][0][n3/2],
211 a[n1-j1][0][0] = -I[j1][0][n3/2]
212 = I[n1-j1][0][n3/2],
213 a[n1-j1][n2/2][1] = R[j1][n2/2][n3/2]
214 = R[n1-j1][n2/2][n3/2],
215 a[n1-j1][n2/2][0] = -I[j1][n2/2][n3/2]
216 = I[n1-j1][n2/2][n3/2],
217 0<j1<n1/2,
218 a[0][0][0] = R[0][0][0],
219 a[0][0][1] = R[0][0][n3/2],
220 a[0][n2/2][0] = R[0][n2/2][0],
221 a[0][n2/2][1] = R[0][n2/2][n3/2],
222 a[n1/2][0][0] = R[n1/2][0][0],
223 a[n1/2][0][1] = R[n1/2][0][n3/2],
224 a[n1/2][n2/2][0] = R[n1/2][n2/2][0],
225 a[n1/2][n2/2][1] = R[n1/2][n2/2][n3/2]
226 ---- output ordering ----
227 rdft3d(n1, n2, n3, 1, a, t, ip, w);
228 rdft3dsort(n1, n2, n3, 1, a);
229 // stored data is a[0...n1-1][0...n2-1][0...n3+1]:
230 // a[k1][k2][2*k3] = R[k1][k2][k3],
231 // a[k1][k2][2*k3+1] = I[k1][k2][k3],
232 // 0<=k1<n1, 0<=k2<n2, 0<=k3<=n3/2.
233 // the stored data is larger than the input data!
234 ---- input ordering ----
235 rdft3dsort(n1, n2, n3, -1, a);
236 rdft3d(n1, n2, n3, -1, a, t, ip, w);
237 t[0...*]
238 :work area (double *)
239 length of t >= max(8*n1, 8*n2), if single thread,
240 length of t >= max(8*n1, 8*n2)*FFT3D_MAX_THREADS,
241 if multi threads,
242 t is dynamically allocated, if t == NULL.
243 ip[0...*]
244 :work area for bit reversal (int *)
245 length of ip >= 2+sqrt(n)
246 (n = max(n1, n2, n3/2))
247 ip[0],ip[1] are pointers of the cos/sin table.
248 w[0...*]
249 :cos/sin table (double *)
250 length of w >= max(n1/2, n2/2, n3/4) + n3/4
251 w[],ip[] are initialized if ip[0] == 0.
252 [remark]
253 Inverse of
254 rdft3d(n1, n2, n3, 1, a, t, ip, w);
255 is
256 rdft3d(n1, n2, n3, -1, a, t, ip, w);
257 for (j1 = 0; j1 <= n1 - 1; j1++) {
258 for (j2 = 0; j2 <= n2 - 1; j2++) {
259 for (j3 = 0; j3 <= n3 - 1; j3++) {
260 a[j1][j2][j3] *= 2.0 / n1 / n2 / n3;
261 }
262 }
263 }
264 .
265
266
267 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
268 [definition]
269 <case1> IDCT (excluding scale)
270 C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
271 a[j1][j2][j3] *
272 cos(pi*j1*(k1+1/2)/n1) *
273 cos(pi*j2*(k2+1/2)/n2) *
274 cos(pi*j3*(k3+1/2)/n3),
275 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
276 <case2> DCT
277 C[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
278 a[j1][j2][j3] *
279 cos(pi*(j1+1/2)*k1/n1) *
280 cos(pi*(j2+1/2)*k2/n2) *
281 cos(pi*(j3+1/2)*k3/n3),
282 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
283 [usage]
284 <case1>
285 ip[0] = 0; // first time only
286 ddct3d(n1, n2, n3, 1, a, t, ip, w);
287 <case2>
288 ip[0] = 0; // first time only
289 ddct3d(n1, n2, n3, -1, a, t, ip, w);
290 [parameters]
291 n1 :data length (int)
292 n1 >= 2, n1 = power of 2
293 n2 :data length (int)
294 n2 >= 2, n2 = power of 2
295 n3 :data length (int)
296 n3 >= 2, n3 = power of 2
297 a[0...n1-1][0...n2-1][0...n3-1]
298 :input/output data (double ***)
299 output data
300 a[k1][k2][k3] = C[k1][k2][k3],
301 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
302 t[0...*]
303 :work area (double *)
304 length of t >= max(4*n1, 4*n2), if single thread,
305 length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS,
306 if multi threads,
307 t is dynamically allocated, if t == NULL.
308 ip[0...*]
309 :work area for bit reversal (int *)
310 length of ip >= 2+sqrt(n)
311 (n = max(n1/2, n2/2, n3/2))
312 ip[0],ip[1] are pointers of the cos/sin table.
313 w[0...*]
314 :cos/sin table (double *)
315 length of w >= max(n1*3/2, n2*3/2, n3*3/2)
316 w[],ip[] are initialized if ip[0] == 0.
317 [remark]
318 Inverse of
319 ddct3d(n1, n2, n3, -1, a, t, ip, w);
320 is
321 for (j1 = 0; j1 <= n1 - 1; j1++) {
322 for (j2 = 0; j2 <= n2 - 1; j2++) {
323 a[j1][j2][0] *= 0.5;
324 }
325 for (j3 = 0; j3 <= n3 - 1; j3++) {
326 a[j1][0][j3] *= 0.5;
327 }
328 }
329 for (j2 = 0; j2 <= n2 - 1; j2++) {
330 for (j3 = 0; j3 <= n3 - 1; j3++) {
331 a[0][j2][j3] *= 0.5;
332 }
333 }
334 ddct3d(n1, n2, n3, 1, a, t, ip, w);
335 for (j1 = 0; j1 <= n1 - 1; j1++) {
336 for (j2 = 0; j2 <= n2 - 1; j2++) {
337 for (j3 = 0; j3 <= n3 - 1; j3++) {
338 a[j1][j2][j3] *= 8.0 / n1 / n2 / n3;
339 }
340 }
341 }
342 .
343
344
345 -------- DST (Discrete Sine Transform) / Inverse of DST --------
346 [definition]
347 <case1> IDST (excluding scale)
348 S[k1][k2][k3] = sum_j1=1^n1 sum_j2=1^n2 sum_j3=1^n3
349 A[j1][j2][j3] *
350 sin(pi*j1*(k1+1/2)/n1) *
351 sin(pi*j2*(k2+1/2)/n2) *
352 sin(pi*j3*(k3+1/2)/n3),
353 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
354 <case2> DST
355 S[k1][k2][k3] = sum_j1=0^n1-1 sum_j2=0^n2-1 sum_j3=0^n3-1
356 a[j1][j2][j3] *
357 sin(pi*(j1+1/2)*k1/n1) *
358 sin(pi*(j2+1/2)*k2/n2) *
359 sin(pi*(j3+1/2)*k3/n3),
360 0<k1<=n1, 0<k2<=n2, 0<k3<=n3
361 [usage]
362 <case1>
363 ip[0] = 0; // first time only
364 ddst3d(n1, n2, n3, 1, a, t, ip, w);
365 <case2>
366 ip[0] = 0; // first time only
367 ddst3d(n1, n2, n3, -1, a, t, ip, w);
368 [parameters]
369 n1 :data length (int)
370 n1 >= 2, n1 = power of 2
371 n2 :data length (int)
372 n2 >= 2, n2 = power of 2
373 n3 :data length (int)
374 n3 >= 2, n3 = power of 2
375 a[0...n1-1][0...n2-1][0...n3-1]
376 :input/output data (double ***)
377 <case1>
378 input data
379 a[j1%n1][j2%n2][j3%n3] = A[j1][j2][j3],
380 0<j1<=n1, 0<j2<=n2, 0<j3<=n3,
381 (n%m : n mod m)
382 output data
383 a[k1][k2][k3] = S[k1][k2][k3],
384 0<=k1<n1, 0<=k2<n2, 0<=k3<n3
385 <case2>
386 output data
387 a[k1%n1][k2%n2][k3%n3] = S[k1][k2][k3],
388 0<k1<=n1, 0<k2<=n2, 0<k3<=n3
389 t[0...*]
390 :work area (double *)
391 length of t >= max(4*n1, 4*n2), if single thread,
392 length of t >= max(4*n1, 4*n2)*FFT3D_MAX_THREADS,
393 if multi threads,
394 t is dynamically allocated, if t == NULL.
395 ip[0...*]
396 :work area for bit reversal (int *)
397 length of ip >= 2+sqrt(n)
398 (n = max(n1/2, n2/2, n3/2))
399 ip[0],ip[1] are pointers of the cos/sin table.
400 w[0...*]
401 :cos/sin table (double *)
402 length of w >= max(n1*3/2, n2*3/2, n3*3/2)
403 w[],ip[] are initialized if ip[0] == 0.
404 [remark]
405 Inverse of
406 ddst3d(n1, n2, n3, -1, a, t, ip, w);
407 is
408 for (j1 = 0; j1 <= n1 - 1; j1++) {
409 for (j2 = 0; j2 <= n2 - 1; j2++) {
410 a[j1][j2][0] *= 0.5;
411 }
412 for (j3 = 0; j3 <= n3 - 1; j3++) {
413 a[j1][0][j3] *= 0.5;
414 }
415 }
416 for (j2 = 0; j2 <= n2 - 1; j2++) {
417 for (j3 = 0; j3 <= n3 - 1; j3++) {
418 a[0][j2][j3] *= 0.5;
419 }
420 }
421 ddst3d(n1, n2, n3, 1, a, t, ip, w);
422 for (j1 = 0; j1 <= n1 - 1; j1++) {
423 for (j2 = 0; j2 <= n2 - 1; j2++) {
424 for (j3 = 0; j3 <= n3 - 1; j3++) {
425 a[j1][j2][j3] *= 8.0 / n1 / n2 / n3;
426 }
427 }
428 }
429 .
430 */
431
432
433 #include <stdio.h>
434 #include <stdlib.h>
435 #define fft3d_alloc_error_check(p) { \
436 if ((p) == NULL) { \
437 fprintf(stderr, "fft3d memory allocation error\n"); \
438 exit(1); \
439 } \
440 }
441
442
443 #ifdef USE_FFT3D_PTHREADS
444 #define USE_FFT3D_THREADS
445 #ifndef FFT3D_MAX_THREADS
446 #define FFT3D_MAX_THREADS 4
447 #endif
448 #ifndef FFT3D_THREADS_BEGIN_N
449 #define FFT3D_THREADS_BEGIN_N 65536
450 #endif
451 #include <pthread.h>
452 #define fft3d_thread_t pthread_t
453 #define fft3d_thread_create(thp,func,argp) { \
454 if (pthread_create(thp, NULL, func, (void *) (argp)) != 0) { \
455 fprintf(stderr, "fft3d thread error\n"); \
456 exit(1); \
457 } \
458 }
459 #define fft3d_thread_wait(th) { \
460 if (pthread_join(th, NULL) != 0) { \
461 fprintf(stderr, "fft3d thread error\n"); \
462 exit(1); \
463 } \
464 }
465 #endif /* USE_FFT3D_PTHREADS */
466
467
468 #ifdef USE_FFT3D_WINTHREADS
469 #define USE_FFT3D_THREADS
470 #ifndef FFT3D_MAX_THREADS
471 #define FFT3D_MAX_THREADS 4
472 #endif
473 #ifndef FFT3D_THREADS_BEGIN_N
474 #define FFT3D_THREADS_BEGIN_N 131072
475 #endif
476 #include <windows.h>
477 #define fft3d_thread_t HANDLE
478 #define fft3d_thread_create(thp,func,argp) { \
479 DWORD thid; \
480 *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) (func), (LPVOID) (argp), 0, &thid); \
481 if (*(thp) == 0) { \
482 fprintf(stderr, "fft3d thread error\n"); \
483 exit(1); \
484 } \
485 }
486 #define fft3d_thread_wait(th) { \
487 WaitForSingleObject(th, INFINITE); \
488 CloseHandle(th); \
489 }
490 #endif /* USE_FFT3D_WINTHREADS */
491
492
cdft3d(int n1,int n2,int n3,int isgn,double *** a,double * t,int * ip,double * w)493 void cdft3d(int n1, int n2, int n3, int isgn, double ***a,
494 double *t, int *ip, double *w)
495 {
496 void makewt(int nw, int *ip, double *w);
497 void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn,
498 double ***a, double *t, int *ip, double *w);
499 void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a,
500 double *t, int *ip, double *w);
501 #ifdef USE_FFT3D_THREADS
502 void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn,
503 double ***a, double *t, int *ip, double *w);
504 void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a,
505 double *t, int *ip, double *w);
506 #endif /* USE_FFT3D_THREADS */
507 int n, itnull, nt;
508
509 n = n1;
510 if (n < n2) {
511 n = n2;
512 }
513 n <<= 1;
514 if (n < n3) {
515 n = n3;
516 }
517 if (n > (ip[0] << 2)) {
518 makewt(n >> 2, ip, w);
519 }
520 itnull = 0;
521 if (t == NULL) {
522 itnull = 1;
523 nt = n1;
524 if (nt < n2) {
525 nt = n2;
526 }
527 nt *= 8;
528 #ifdef USE_FFT3D_THREADS
529 nt *= FFT3D_MAX_THREADS;
530 #endif /* USE_FFT3D_THREADS */
531 if (n3 == 4) {
532 nt >>= 1;
533 } else if (n3 < 4) {
534 nt >>= 2;
535 }
536 t = (double *) malloc(sizeof(double) * nt);
537 fft3d_alloc_error_check(t);
538 }
539 #ifdef USE_FFT3D_THREADS
540 if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
541 xdft3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
542 cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
543 } else
544 #endif /* USE_FFT3D_THREADS */
545 {
546 xdft3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
547 cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
548 }
549 if (itnull != 0) {
550 free(t);
551 }
552 }
553
554
rdft3d(int n1,int n2,int n3,int isgn,double *** a,double * t,int * ip,double * w)555 void rdft3d(int n1, int n2, int n3, int isgn, double ***a,
556 double *t, int *ip, double *w)
557 {
558 void makewt(int nw, int *ip, double *w);
559 void makect(int nc, int *ip, double *c);
560 void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn,
561 double ***a, double *t, int *ip, double *w);
562 void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a,
563 double *t, int *ip, double *w);
564 void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a);
565 #ifdef USE_FFT3D_THREADS
566 void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn,
567 double ***a, double *t, int *ip, double *w);
568 void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a,
569 double *t, int *ip, double *w);
570 #endif /* USE_FFT3D_THREADS */
571 int n, nw, nc, itnull, nt;
572
573 n = n1;
574 if (n < n2) {
575 n = n2;
576 }
577 n <<= 1;
578 if (n < n3) {
579 n = n3;
580 }
581 nw = ip[0];
582 if (n > (nw << 2)) {
583 nw = n >> 2;
584 makewt(nw, ip, w);
585 }
586 nc = ip[1];
587 if (n3 > (nc << 2)) {
588 nc = n3 >> 2;
589 makect(nc, ip, w + nw);
590 }
591 itnull = 0;
592 if (t == NULL) {
593 itnull = 1;
594 nt = n1;
595 if (nt < n2) {
596 nt = n2;
597 }
598 nt *= 8;
599 #ifdef USE_FFT3D_THREADS
600 nt *= FFT3D_MAX_THREADS;
601 #endif /* USE_FFT3D_THREADS */
602 if (n3 == 4) {
603 nt >>= 1;
604 } else if (n3 < 4) {
605 nt >>= 2;
606 }
607 t = (double *) malloc(sizeof(double) * nt);
608 fft3d_alloc_error_check(t);
609 }
610 #ifdef USE_FFT3D_THREADS
611 if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
612 if (isgn < 0) {
613 rdft3d_sub(n1, n2, n3, isgn, a);
614 cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
615 }
616 xdft3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
617 if (isgn >= 0) {
618 cdft3db_subth(n1, n2, n3, isgn, a, t, ip, w);
619 rdft3d_sub(n1, n2, n3, isgn, a);
620 }
621 } else
622 #endif /* USE_FFT3D_THREADS */
623 {
624 if (isgn < 0) {
625 rdft3d_sub(n1, n2, n3, isgn, a);
626 cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
627 }
628 xdft3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
629 if (isgn >= 0) {
630 cdft3db_sub(n1, n2, n3, isgn, a, t, ip, w);
631 rdft3d_sub(n1, n2, n3, isgn, a);
632 }
633 }
634 if (itnull != 0) {
635 free(t);
636 }
637 }
638
639
rdft3dsort(int n1,int n2,int n3,int isgn,double *** a)640 void rdft3dsort(int n1, int n2, int n3, int isgn, double ***a)
641 {
642 int n1h, n2h, i, j;
643 double x, y;
644
645 n1h = n1 >> 1;
646 n2h = n2 >> 1;
647 if (isgn < 0) {
648 for (i = 0; i < n1; i++) {
649 for (j = n2h + 1; j < n2; j++) {
650 a[i][j][0] = a[i][j][n3 + 1];
651 a[i][j][1] = a[i][j][n3];
652 }
653 }
654 for (i = n1h + 1; i < n1; i++) {
655 a[i][0][0] = a[i][0][n3 + 1];
656 a[i][0][1] = a[i][0][n3];
657 a[i][n2h][0] = a[i][n2h][n3 + 1];
658 a[i][n2h][1] = a[i][n2h][n3];
659 }
660 a[0][0][1] = a[0][0][n3];
661 a[0][n2h][1] = a[0][n2h][n3];
662 a[n1h][0][1] = a[n1h][0][n3];
663 a[n1h][n2h][1] = a[n1h][n2h][n3];
664 } else {
665 for (j = n2h + 1; j < n2; j++) {
666 y = a[0][j][0];
667 x = a[0][j][1];
668 a[0][j][n3] = x;
669 a[0][j][n3 + 1] = y;
670 a[0][n2 - j][n3] = x;
671 a[0][n2 - j][n3 + 1] = -y;
672 a[0][j][0] = a[0][n2 - j][0];
673 a[0][j][1] = -a[0][n2 - j][1];
674 }
675 for (i = 1; i < n1; i++) {
676 for (j = n2h + 1; j < n2; j++) {
677 y = a[i][j][0];
678 x = a[i][j][1];
679 a[i][j][n3] = x;
680 a[i][j][n3 + 1] = y;
681 a[n1 - i][n2 - j][n3] = x;
682 a[n1 - i][n2 - j][n3 + 1] = -y;
683 a[i][j][0] = a[n1 - i][n2 - j][0];
684 a[i][j][1] = -a[n1 - i][n2 - j][1];
685 }
686 }
687 for (i = n1h + 1; i < n1; i++) {
688 y = a[i][0][0];
689 x = a[i][0][1];
690 a[i][0][n3] = x;
691 a[i][0][n3 + 1] = y;
692 a[n1 - i][0][n3] = x;
693 a[n1 - i][0][n3 + 1] = -y;
694 a[i][0][0] = a[n1 - i][0][0];
695 a[i][0][1] = -a[n1 - i][0][1];
696 y = a[i][n2h][0];
697 x = a[i][n2h][1];
698 a[i][n2h][n3] = x;
699 a[i][n2h][n3 + 1] = y;
700 a[n1 - i][n2h][n3] = x;
701 a[n1 - i][n2h][n3 + 1] = -y;
702 a[i][n2h][0] = a[n1 - i][n2h][0];
703 a[i][n2h][1] = -a[n1 - i][n2h][1];
704 }
705 a[0][0][n3] = a[0][0][1];
706 a[0][0][n3 + 1] = 0;
707 a[0][0][1] = 0;
708 a[0][n2h][n3] = a[0][n2h][1];
709 a[0][n2h][n3 + 1] = 0;
710 a[0][n2h][1] = 0;
711 a[n1h][0][n3] = a[n1h][0][1];
712 a[n1h][0][n3 + 1] = 0;
713 a[n1h][0][1] = 0;
714 a[n1h][n2h][n3] = a[n1h][n2h][1];
715 a[n1h][n2h][n3 + 1] = 0;
716 a[n1h][n2h][1] = 0;
717 }
718 }
719
720
ddct3d(int n1,int n2,int n3,int isgn,double *** a,double * t,int * ip,double * w)721 void ddct3d(int n1, int n2, int n3, int isgn, double ***a,
722 double *t, int *ip, double *w)
723 {
724 void makewt(int nw, int *ip, double *w);
725 void makect(int nc, int *ip, double *c);
726 void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn,
727 double ***a, double *t, int *ip, double *w);
728 void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn,
729 double ***a, double *t, int *ip, double *w);
730 #ifdef USE_FFT3D_THREADS
731 void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn,
732 double ***a, double *t, int *ip, double *w);
733 void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn,
734 double ***a, double *t, int *ip, double *w);
735 #endif /* USE_FFT3D_THREADS */
736 int n, nw, nc, itnull, nt;
737
738 n = n1;
739 if (n < n2) {
740 n = n2;
741 }
742 if (n < n3) {
743 n = n3;
744 }
745 nw = ip[0];
746 if (n > (nw << 2)) {
747 nw = n >> 2;
748 makewt(nw, ip, w);
749 }
750 nc = ip[1];
751 if (n > nc) {
752 nc = n;
753 makect(nc, ip, w + nw);
754 }
755 itnull = 0;
756 if (t == NULL) {
757 itnull = 1;
758 nt = n1;
759 if (nt < n2) {
760 nt = n2;
761 }
762 nt *= 4;
763 #ifdef USE_FFT3D_THREADS
764 nt *= FFT3D_MAX_THREADS;
765 #endif /* USE_FFT3D_THREADS */
766 if (n3 == 2) {
767 nt >>= 1;
768 }
769 t = (double *) malloc(sizeof(double) * nt);
770 fft3d_alloc_error_check(t);
771 }
772 #ifdef USE_FFT3D_THREADS
773 if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
774 ddxt3da_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
775 ddxt3db_subth(n1, n2, n3, 0, isgn, a, t, ip, w);
776 } else
777 #endif /* USE_FFT3D_THREADS */
778 {
779 ddxt3da_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
780 ddxt3db_sub(n1, n2, n3, 0, isgn, a, t, ip, w);
781 }
782 if (itnull != 0) {
783 free(t);
784 }
785 }
786
787
ddst3d(int n1,int n2,int n3,int isgn,double *** a,double * t,int * ip,double * w)788 void ddst3d(int n1, int n2, int n3, int isgn, double ***a,
789 double *t, int *ip, double *w)
790 {
791 void makewt(int nw, int *ip, double *w);
792 void makect(int nc, int *ip, double *c);
793 void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn,
794 double ***a, double *t, int *ip, double *w);
795 void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn,
796 double ***a, double *t, int *ip, double *w);
797 #ifdef USE_FFT3D_THREADS
798 void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn,
799 double ***a, double *t, int *ip, double *w);
800 void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn,
801 double ***a, double *t, int *ip, double *w);
802 #endif /* USE_FFT3D_THREADS */
803 int n, nw, nc, itnull, nt;
804
805 n = n1;
806 if (n < n2) {
807 n = n2;
808 }
809 if (n < n3) {
810 n = n3;
811 }
812 nw = ip[0];
813 if (n > (nw << 2)) {
814 nw = n >> 2;
815 makewt(nw, ip, w);
816 }
817 nc = ip[1];
818 if (n > nc) {
819 nc = n;
820 makect(nc, ip, w + nw);
821 }
822 itnull = 0;
823 if (t == NULL) {
824 itnull = 1;
825 nt = n1;
826 if (nt < n2) {
827 nt = n2;
828 }
829 nt *= 4;
830 #ifdef USE_FFT3D_THREADS
831 nt *= FFT3D_MAX_THREADS;
832 #endif /* USE_FFT3D_THREADS */
833 if (n3 == 2) {
834 nt >>= 1;
835 }
836 t = (double *) malloc(sizeof(double) * nt);
837 fft3d_alloc_error_check(t);
838 }
839 #ifdef USE_FFT3D_THREADS
840 if ((double) n1 * n2 * n3 >= (double) FFT3D_THREADS_BEGIN_N) {
841 ddxt3da_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
842 ddxt3db_subth(n1, n2, n3, 1, isgn, a, t, ip, w);
843 } else
844 #endif /* USE_FFT3D_THREADS */
845 {
846 ddxt3da_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
847 ddxt3db_sub(n1, n2, n3, 1, isgn, a, t, ip, w);
848 }
849 if (itnull != 0) {
850 free(t);
851 }
852 }
853
854
855 /* -------- child routines -------- */
856
857
xdft3da_sub(int n1,int n2,int n3,int icr,int isgn,double *** a,double * t,int * ip,double * w)858 void xdft3da_sub(int n1, int n2, int n3, int icr, int isgn,
859 double ***a, double *t, int *ip, double *w)
860 {
861 void cdft(int n, int isgn, double *a, int *ip, double *w);
862 void rdft(int n, int isgn, double *a, int *ip, double *w);
863 int i, j, k;
864
865 for (i = 0; i < n1; i++) {
866 if (icr == 0) {
867 for (j = 0; j < n2; j++) {
868 cdft(n3, isgn, a[i][j], ip, w);
869 }
870 } else if (isgn >= 0) {
871 for (j = 0; j < n2; j++) {
872 rdft(n3, isgn, a[i][j], ip, w);
873 }
874 }
875 if (n3 > 4) {
876 for (k = 0; k < n3; k += 8) {
877 for (j = 0; j < n2; j++) {
878 t[2 * j] = a[i][j][k];
879 t[2 * j + 1] = a[i][j][k + 1];
880 t[2 * n2 + 2 * j] = a[i][j][k + 2];
881 t[2 * n2 + 2 * j + 1] = a[i][j][k + 3];
882 t[4 * n2 + 2 * j] = a[i][j][k + 4];
883 t[4 * n2 + 2 * j + 1] = a[i][j][k + 5];
884 t[6 * n2 + 2 * j] = a[i][j][k + 6];
885 t[6 * n2 + 2 * j + 1] = a[i][j][k + 7];
886 }
887 cdft(2 * n2, isgn, t, ip, w);
888 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
889 cdft(2 * n2, isgn, &t[4 * n2], ip, w);
890 cdft(2 * n2, isgn, &t[6 * n2], ip, w);
891 for (j = 0; j < n2; j++) {
892 a[i][j][k] = t[2 * j];
893 a[i][j][k + 1] = t[2 * j + 1];
894 a[i][j][k + 2] = t[2 * n2 + 2 * j];
895 a[i][j][k + 3] = t[2 * n2 + 2 * j + 1];
896 a[i][j][k + 4] = t[4 * n2 + 2 * j];
897 a[i][j][k + 5] = t[4 * n2 + 2 * j + 1];
898 a[i][j][k + 6] = t[6 * n2 + 2 * j];
899 a[i][j][k + 7] = t[6 * n2 + 2 * j + 1];
900 }
901 }
902 } else if (n3 == 4) {
903 for (j = 0; j < n2; j++) {
904 t[2 * j] = a[i][j][0];
905 t[2 * j + 1] = a[i][j][1];
906 t[2 * n2 + 2 * j] = a[i][j][2];
907 t[2 * n2 + 2 * j + 1] = a[i][j][3];
908 }
909 cdft(2 * n2, isgn, t, ip, w);
910 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
911 for (j = 0; j < n2; j++) {
912 a[i][j][0] = t[2 * j];
913 a[i][j][1] = t[2 * j + 1];
914 a[i][j][2] = t[2 * n2 + 2 * j];
915 a[i][j][3] = t[2 * n2 + 2 * j + 1];
916 }
917 } else if (n3 == 2) {
918 for (j = 0; j < n2; j++) {
919 t[2 * j] = a[i][j][0];
920 t[2 * j + 1] = a[i][j][1];
921 }
922 cdft(2 * n2, isgn, t, ip, w);
923 for (j = 0; j < n2; j++) {
924 a[i][j][0] = t[2 * j];
925 a[i][j][1] = t[2 * j + 1];
926 }
927 }
928 if (icr != 0 && isgn < 0) {
929 for (j = 0; j < n2; j++) {
930 rdft(n3, isgn, a[i][j], ip, w);
931 }
932 }
933 }
934 }
935
936
cdft3db_sub(int n1,int n2,int n3,int isgn,double *** a,double * t,int * ip,double * w)937 void cdft3db_sub(int n1, int n2, int n3, int isgn, double ***a,
938 double *t, int *ip, double *w)
939 {
940 void cdft(int n, int isgn, double *a, int *ip, double *w);
941 int i, j, k;
942
943 if (n3 > 4) {
944 for (j = 0; j < n2; j++) {
945 for (k = 0; k < n3; k += 8) {
946 for (i = 0; i < n1; i++) {
947 t[2 * i] = a[i][j][k];
948 t[2 * i + 1] = a[i][j][k + 1];
949 t[2 * n1 + 2 * i] = a[i][j][k + 2];
950 t[2 * n1 + 2 * i + 1] = a[i][j][k + 3];
951 t[4 * n1 + 2 * i] = a[i][j][k + 4];
952 t[4 * n1 + 2 * i + 1] = a[i][j][k + 5];
953 t[6 * n1 + 2 * i] = a[i][j][k + 6];
954 t[6 * n1 + 2 * i + 1] = a[i][j][k + 7];
955 }
956 cdft(2 * n1, isgn, t, ip, w);
957 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
958 cdft(2 * n1, isgn, &t[4 * n1], ip, w);
959 cdft(2 * n1, isgn, &t[6 * n1], ip, w);
960 for (i = 0; i < n1; i++) {
961 a[i][j][k] = t[2 * i];
962 a[i][j][k + 1] = t[2 * i + 1];
963 a[i][j][k + 2] = t[2 * n1 + 2 * i];
964 a[i][j][k + 3] = t[2 * n1 + 2 * i + 1];
965 a[i][j][k + 4] = t[4 * n1 + 2 * i];
966 a[i][j][k + 5] = t[4 * n1 + 2 * i + 1];
967 a[i][j][k + 6] = t[6 * n1 + 2 * i];
968 a[i][j][k + 7] = t[6 * n1 + 2 * i + 1];
969 }
970 }
971 }
972 } else if (n3 == 4) {
973 for (j = 0; j < n2; j++) {
974 for (i = 0; i < n1; i++) {
975 t[2 * i] = a[i][j][0];
976 t[2 * i + 1] = a[i][j][1];
977 t[2 * n1 + 2 * i] = a[i][j][2];
978 t[2 * n1 + 2 * i + 1] = a[i][j][3];
979 }
980 cdft(2 * n1, isgn, t, ip, w);
981 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
982 for (i = 0; i < n1; i++) {
983 a[i][j][0] = t[2 * i];
984 a[i][j][1] = t[2 * i + 1];
985 a[i][j][2] = t[2 * n1 + 2 * i];
986 a[i][j][3] = t[2 * n1 + 2 * i + 1];
987 }
988 }
989 } else if (n3 == 2) {
990 for (j = 0; j < n2; j++) {
991 for (i = 0; i < n1; i++) {
992 t[2 * i] = a[i][j][0];
993 t[2 * i + 1] = a[i][j][1];
994 }
995 cdft(2 * n1, isgn, t, ip, w);
996 for (i = 0; i < n1; i++) {
997 a[i][j][0] = t[2 * i];
998 a[i][j][1] = t[2 * i + 1];
999 }
1000 }
1001 }
1002 }
1003
1004
rdft3d_sub(int n1,int n2,int n3,int isgn,double *** a)1005 void rdft3d_sub(int n1, int n2, int n3, int isgn, double ***a)
1006 {
1007 int n1h, n2h, i, j, k, l;
1008 double xi;
1009
1010 n1h = n1 >> 1;
1011 n2h = n2 >> 1;
1012 if (isgn < 0) {
1013 for (i = 1; i < n1h; i++) {
1014 j = n1 - i;
1015 xi = a[i][0][0] - a[j][0][0];
1016 a[i][0][0] += a[j][0][0];
1017 a[j][0][0] = xi;
1018 xi = a[j][0][1] - a[i][0][1];
1019 a[i][0][1] += a[j][0][1];
1020 a[j][0][1] = xi;
1021 xi = a[i][n2h][0] - a[j][n2h][0];
1022 a[i][n2h][0] += a[j][n2h][0];
1023 a[j][n2h][0] = xi;
1024 xi = a[j][n2h][1] - a[i][n2h][1];
1025 a[i][n2h][1] += a[j][n2h][1];
1026 a[j][n2h][1] = xi;
1027 for (k = 1; k < n2h; k++) {
1028 l = n2 - k;
1029 xi = a[i][k][0] - a[j][l][0];
1030 a[i][k][0] += a[j][l][0];
1031 a[j][l][0] = xi;
1032 xi = a[j][l][1] - a[i][k][1];
1033 a[i][k][1] += a[j][l][1];
1034 a[j][l][1] = xi;
1035 xi = a[j][k][0] - a[i][l][0];
1036 a[j][k][0] += a[i][l][0];
1037 a[i][l][0] = xi;
1038 xi = a[i][l][1] - a[j][k][1];
1039 a[j][k][1] += a[i][l][1];
1040 a[i][l][1] = xi;
1041 }
1042 }
1043 for (k = 1; k < n2h; k++) {
1044 l = n2 - k;
1045 xi = a[0][k][0] - a[0][l][0];
1046 a[0][k][0] += a[0][l][0];
1047 a[0][l][0] = xi;
1048 xi = a[0][l][1] - a[0][k][1];
1049 a[0][k][1] += a[0][l][1];
1050 a[0][l][1] = xi;
1051 xi = a[n1h][k][0] - a[n1h][l][0];
1052 a[n1h][k][0] += a[n1h][l][0];
1053 a[n1h][l][0] = xi;
1054 xi = a[n1h][l][1] - a[n1h][k][1];
1055 a[n1h][k][1] += a[n1h][l][1];
1056 a[n1h][l][1] = xi;
1057 }
1058 } else {
1059 for (i = 1; i < n1h; i++) {
1060 j = n1 - i;
1061 a[j][0][0] = 0.5 * (a[i][0][0] - a[j][0][0]);
1062 a[i][0][0] -= a[j][0][0];
1063 a[j][0][1] = 0.5 * (a[i][0][1] + a[j][0][1]);
1064 a[i][0][1] -= a[j][0][1];
1065 a[j][n2h][0] = 0.5 * (a[i][n2h][0] - a[j][n2h][0]);
1066 a[i][n2h][0] -= a[j][n2h][0];
1067 a[j][n2h][1] = 0.5 * (a[i][n2h][1] + a[j][n2h][1]);
1068 a[i][n2h][1] -= a[j][n2h][1];
1069 for (k = 1; k < n2h; k++) {
1070 l = n2 - k;
1071 a[j][l][0] = 0.5 * (a[i][k][0] - a[j][l][0]);
1072 a[i][k][0] -= a[j][l][0];
1073 a[j][l][1] = 0.5 * (a[i][k][1] + a[j][l][1]);
1074 a[i][k][1] -= a[j][l][1];
1075 a[i][l][0] = 0.5 * (a[j][k][0] - a[i][l][0]);
1076 a[j][k][0] -= a[i][l][0];
1077 a[i][l][1] = 0.5 * (a[j][k][1] + a[i][l][1]);
1078 a[j][k][1] -= a[i][l][1];
1079 }
1080 }
1081 for (k = 1; k < n2h; k++) {
1082 l = n2 - k;
1083 a[0][l][0] = 0.5 * (a[0][k][0] - a[0][l][0]);
1084 a[0][k][0] -= a[0][l][0];
1085 a[0][l][1] = 0.5 * (a[0][k][1] + a[0][l][1]);
1086 a[0][k][1] -= a[0][l][1];
1087 a[n1h][l][0] = 0.5 * (a[n1h][k][0] - a[n1h][l][0]);
1088 a[n1h][k][0] -= a[n1h][l][0];
1089 a[n1h][l][1] = 0.5 * (a[n1h][k][1] + a[n1h][l][1]);
1090 a[n1h][k][1] -= a[n1h][l][1];
1091 }
1092 }
1093 }
1094
1095
ddxt3da_sub(int n1,int n2,int n3,int ics,int isgn,double *** a,double * t,int * ip,double * w)1096 void ddxt3da_sub(int n1, int n2, int n3, int ics, int isgn,
1097 double ***a, double *t, int *ip, double *w)
1098 {
1099 void ddct(int n, int isgn, double *a, int *ip, double *w);
1100 void ddst(int n, int isgn, double *a, int *ip, double *w);
1101 int i, j, k;
1102
1103 for (i = 0; i < n1; i++) {
1104 if (ics == 0) {
1105 for (j = 0; j < n2; j++) {
1106 ddct(n3, isgn, a[i][j], ip, w);
1107 }
1108 } else {
1109 for (j = 0; j < n2; j++) {
1110 ddst(n3, isgn, a[i][j], ip, w);
1111 }
1112 }
1113 if (n3 > 2) {
1114 for (k = 0; k < n3; k += 4) {
1115 for (j = 0; j < n2; j++) {
1116 t[j] = a[i][j][k];
1117 t[n2 + j] = a[i][j][k + 1];
1118 t[2 * n2 + j] = a[i][j][k + 2];
1119 t[3 * n2 + j] = a[i][j][k + 3];
1120 }
1121 if (ics == 0) {
1122 ddct(n2, isgn, t, ip, w);
1123 ddct(n2, isgn, &t[n2], ip, w);
1124 ddct(n2, isgn, &t[2 * n2], ip, w);
1125 ddct(n2, isgn, &t[3 * n2], ip, w);
1126 } else {
1127 ddst(n2, isgn, t, ip, w);
1128 ddst(n2, isgn, &t[n2], ip, w);
1129 ddst(n2, isgn, &t[2 * n2], ip, w);
1130 ddst(n2, isgn, &t[3 * n2], ip, w);
1131 }
1132 for (j = 0; j < n2; j++) {
1133 a[i][j][k] = t[j];
1134 a[i][j][k + 1] = t[n2 + j];
1135 a[i][j][k + 2] = t[2 * n2 + j];
1136 a[i][j][k + 3] = t[3 * n2 + j];
1137 }
1138 }
1139 } else if (n3 == 2) {
1140 for (j = 0; j < n2; j++) {
1141 t[j] = a[i][j][0];
1142 t[n2 + j] = a[i][j][1];
1143 }
1144 if (ics == 0) {
1145 ddct(n2, isgn, t, ip, w);
1146 ddct(n2, isgn, &t[n2], ip, w);
1147 } else {
1148 ddst(n2, isgn, t, ip, w);
1149 ddst(n2, isgn, &t[n2], ip, w);
1150 }
1151 for (j = 0; j < n2; j++) {
1152 a[i][j][0] = t[j];
1153 a[i][j][1] = t[n2 + j];
1154 }
1155 }
1156 }
1157 }
1158
1159
ddxt3db_sub(int n1,int n2,int n3,int ics,int isgn,double *** a,double * t,int * ip,double * w)1160 void ddxt3db_sub(int n1, int n2, int n3, int ics, int isgn,
1161 double ***a, double *t, int *ip, double *w)
1162 {
1163 void ddct(int n, int isgn, double *a, int *ip, double *w);
1164 void ddst(int n, int isgn, double *a, int *ip, double *w);
1165 int i, j, k;
1166
1167 if (n3 > 2) {
1168 for (j = 0; j < n2; j++) {
1169 for (k = 0; k < n3; k += 4) {
1170 for (i = 0; i < n1; i++) {
1171 t[i] = a[i][j][k];
1172 t[n1 + i] = a[i][j][k + 1];
1173 t[2 * n1 + i] = a[i][j][k + 2];
1174 t[3 * n1 + i] = a[i][j][k + 3];
1175 }
1176 if (ics == 0) {
1177 ddct(n1, isgn, t, ip, w);
1178 ddct(n1, isgn, &t[n1], ip, w);
1179 ddct(n1, isgn, &t[2 * n1], ip, w);
1180 ddct(n1, isgn, &t[3 * n1], ip, w);
1181 } else {
1182 ddst(n1, isgn, t, ip, w);
1183 ddst(n1, isgn, &t[n1], ip, w);
1184 ddst(n1, isgn, &t[2 * n1], ip, w);
1185 ddst(n1, isgn, &t[3 * n1], ip, w);
1186 }
1187 for (i = 0; i < n1; i++) {
1188 a[i][j][k] = t[i];
1189 a[i][j][k + 1] = t[n1 + i];
1190 a[i][j][k + 2] = t[2 * n1 + i];
1191 a[i][j][k + 3] = t[3 * n1 + i];
1192 }
1193 }
1194 }
1195 } else if (n3 == 2) {
1196 for (j = 0; j < n2; j++) {
1197 for (i = 0; i < n1; i++) {
1198 t[i] = a[i][j][0];
1199 t[n1 + i] = a[i][j][1];
1200 }
1201 if (ics == 0) {
1202 ddct(n1, isgn, t, ip, w);
1203 ddct(n1, isgn, &t[n1], ip, w);
1204 } else {
1205 ddst(n1, isgn, t, ip, w);
1206 ddst(n1, isgn, &t[n1], ip, w);
1207 }
1208 for (i = 0; i < n1; i++) {
1209 a[i][j][0] = t[i];
1210 a[i][j][1] = t[n1 + i];
1211 }
1212 }
1213 }
1214 }
1215
1216
1217 #ifdef USE_FFT3D_THREADS
1218 struct fft3d_arg_st {
1219 int nthread;
1220 int n0;
1221 int n1;
1222 int n2;
1223 int n3;
1224 int ic;
1225 int isgn;
1226 double ***a;
1227 double *t;
1228 int *ip;
1229 double *w;
1230 };
1231 typedef struct fft3d_arg_st fft3d_arg_t;
1232
1233
xdft3da_subth(int n1,int n2,int n3,int icr,int isgn,double *** a,double * t,int * ip,double * w)1234 void xdft3da_subth(int n1, int n2, int n3, int icr, int isgn,
1235 double ***a, double *t, int *ip, double *w)
1236 {
1237 void *xdft3da_th(void *p);
1238 fft3d_thread_t th[FFT3D_MAX_THREADS];
1239 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1240 int nthread, nt, i;
1241
1242 nthread = FFT3D_MAX_THREADS;
1243 if (nthread > n1) {
1244 nthread = n1;
1245 }
1246 nt = 8 * n2;
1247 if (n3 == 4) {
1248 nt >>= 1;
1249 } else if (n3 < 4) {
1250 nt >>= 2;
1251 }
1252 for (i = 0; i < nthread; i++) {
1253 ag[i].nthread = nthread;
1254 ag[i].n0 = i;
1255 ag[i].n1 = n1;
1256 ag[i].n2 = n2;
1257 ag[i].n3 = n3;
1258 ag[i].ic = icr;
1259 ag[i].isgn = isgn;
1260 ag[i].a = a;
1261 ag[i].t = &t[nt * i];
1262 ag[i].ip = ip;
1263 ag[i].w = w;
1264 fft3d_thread_create(&th[i], xdft3da_th, &ag[i]);
1265 }
1266 for (i = 0; i < nthread; i++) {
1267 fft3d_thread_wait(th[i]);
1268 }
1269 }
1270
1271
cdft3db_subth(int n1,int n2,int n3,int isgn,double *** a,double * t,int * ip,double * w)1272 void cdft3db_subth(int n1, int n2, int n3, int isgn, double ***a,
1273 double *t, int *ip, double *w)
1274 {
1275 void *cdft3db_th(void *p);
1276 fft3d_thread_t th[FFT3D_MAX_THREADS];
1277 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1278 int nthread, nt, i;
1279
1280 nthread = FFT3D_MAX_THREADS;
1281 if (nthread > n2) {
1282 nthread = n2;
1283 }
1284 nt = 8 * n1;
1285 if (n3 == 4) {
1286 nt >>= 1;
1287 } else if (n3 < 4) {
1288 nt >>= 2;
1289 }
1290 for (i = 0; i < nthread; i++) {
1291 ag[i].nthread = nthread;
1292 ag[i].n0 = i;
1293 ag[i].n1 = n1;
1294 ag[i].n2 = n2;
1295 ag[i].n3 = n3;
1296 ag[i].isgn = isgn;
1297 ag[i].a = a;
1298 ag[i].t = &t[nt * i];
1299 ag[i].ip = ip;
1300 ag[i].w = w;
1301 fft3d_thread_create(&th[i], cdft3db_th, &ag[i]);
1302 }
1303 for (i = 0; i < nthread; i++) {
1304 fft3d_thread_wait(th[i]);
1305 }
1306 }
1307
1308
ddxt3da_subth(int n1,int n2,int n3,int ics,int isgn,double *** a,double * t,int * ip,double * w)1309 void ddxt3da_subth(int n1, int n2, int n3, int ics, int isgn,
1310 double ***a, double *t, int *ip, double *w)
1311 {
1312 void *ddxt3da_th(void *p);
1313 fft3d_thread_t th[FFT3D_MAX_THREADS];
1314 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1315 int nthread, nt, i;
1316
1317 nthread = FFT3D_MAX_THREADS;
1318 if (nthread > n1) {
1319 nthread = n1;
1320 }
1321 nt = 4 * n2;
1322 if (n3 == 2) {
1323 nt >>= 1;
1324 }
1325 for (i = 0; i < nthread; i++) {
1326 ag[i].nthread = nthread;
1327 ag[i].n0 = i;
1328 ag[i].n1 = n1;
1329 ag[i].n2 = n2;
1330 ag[i].n3 = n3;
1331 ag[i].ic = ics;
1332 ag[i].isgn = isgn;
1333 ag[i].a = a;
1334 ag[i].t = &t[nt * i];
1335 ag[i].ip = ip;
1336 ag[i].w = w;
1337 fft3d_thread_create(&th[i], ddxt3da_th, &ag[i]);
1338 }
1339 for (i = 0; i < nthread; i++) {
1340 fft3d_thread_wait(th[i]);
1341 }
1342 }
1343
1344
ddxt3db_subth(int n1,int n2,int n3,int ics,int isgn,double *** a,double * t,int * ip,double * w)1345 void ddxt3db_subth(int n1, int n2, int n3, int ics, int isgn,
1346 double ***a, double *t, int *ip, double *w)
1347 {
1348 void *ddxt3db_th(void *p);
1349 fft3d_thread_t th[FFT3D_MAX_THREADS];
1350 fft3d_arg_t ag[FFT3D_MAX_THREADS];
1351 int nthread, nt, i;
1352
1353 nthread = FFT3D_MAX_THREADS;
1354 if (nthread > n2) {
1355 nthread = n2;
1356 }
1357 nt = 4 * n1;
1358 if (n3 == 2) {
1359 nt >>= 1;
1360 }
1361 for (i = 0; i < nthread; i++) {
1362 ag[i].nthread = nthread;
1363 ag[i].n0 = i;
1364 ag[i].n1 = n1;
1365 ag[i].n2 = n2;
1366 ag[i].n3 = n3;
1367 ag[i].ic = ics;
1368 ag[i].isgn = isgn;
1369 ag[i].a = a;
1370 ag[i].t = &t[nt * i];
1371 ag[i].ip = ip;
1372 ag[i].w = w;
1373 fft3d_thread_create(&th[i], ddxt3db_th, &ag[i]);
1374 }
1375 for (i = 0; i < nthread; i++) {
1376 fft3d_thread_wait(th[i]);
1377 }
1378 }
1379
1380
xdft3da_th(void * p)1381 void *xdft3da_th(void *p)
1382 {
1383 void cdft(int n, int isgn, double *a, int *ip, double *w);
1384 void rdft(int n, int isgn, double *a, int *ip, double *w);
1385 int nthread, n0, n1, n2, n3, icr, isgn, *ip, i, j, k;
1386 double ***a, *t, *w;
1387
1388 nthread = ((fft3d_arg_t *) p)->nthread;
1389 n0 = ((fft3d_arg_t *) p)->n0;
1390 n1 = ((fft3d_arg_t *) p)->n1;
1391 n2 = ((fft3d_arg_t *) p)->n2;
1392 n3 = ((fft3d_arg_t *) p)->n3;
1393 icr = ((fft3d_arg_t *) p)->ic;
1394 isgn = ((fft3d_arg_t *) p)->isgn;
1395 a = ((fft3d_arg_t *) p)->a;
1396 t = ((fft3d_arg_t *) p)->t;
1397 ip = ((fft3d_arg_t *) p)->ip;
1398 w = ((fft3d_arg_t *) p)->w;
1399 for (i = n0; i < n1; i += nthread) {
1400 if (icr == 0) {
1401 for (j = 0; j < n2; j++) {
1402 cdft(n3, isgn, a[i][j], ip, w);
1403 }
1404 } else if (isgn >= 0) {
1405 for (j = 0; j < n2; j++) {
1406 rdft(n3, isgn, a[i][j], ip, w);
1407 }
1408 }
1409 if (n3 > 4) {
1410 for (k = 0; k < n3; k += 8) {
1411 for (j = 0; j < n2; j++) {
1412 t[2 * j] = a[i][j][k];
1413 t[2 * j + 1] = a[i][j][k + 1];
1414 t[2 * n2 + 2 * j] = a[i][j][k + 2];
1415 t[2 * n2 + 2 * j + 1] = a[i][j][k + 3];
1416 t[4 * n2 + 2 * j] = a[i][j][k + 4];
1417 t[4 * n2 + 2 * j + 1] = a[i][j][k + 5];
1418 t[6 * n2 + 2 * j] = a[i][j][k + 6];
1419 t[6 * n2 + 2 * j + 1] = a[i][j][k + 7];
1420 }
1421 cdft(2 * n2, isgn, t, ip, w);
1422 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
1423 cdft(2 * n2, isgn, &t[4 * n2], ip, w);
1424 cdft(2 * n2, isgn, &t[6 * n2], ip, w);
1425 for (j = 0; j < n2; j++) {
1426 a[i][j][k] = t[2 * j];
1427 a[i][j][k + 1] = t[2 * j + 1];
1428 a[i][j][k + 2] = t[2 * n2 + 2 * j];
1429 a[i][j][k + 3] = t[2 * n2 + 2 * j + 1];
1430 a[i][j][k + 4] = t[4 * n2 + 2 * j];
1431 a[i][j][k + 5] = t[4 * n2 + 2 * j + 1];
1432 a[i][j][k + 6] = t[6 * n2 + 2 * j];
1433 a[i][j][k + 7] = t[6 * n2 + 2 * j + 1];
1434 }
1435 }
1436 } else if (n3 == 4) {
1437 for (j = 0; j < n2; j++) {
1438 t[2 * j] = a[i][j][0];
1439 t[2 * j + 1] = a[i][j][1];
1440 t[2 * n2 + 2 * j] = a[i][j][2];
1441 t[2 * n2 + 2 * j + 1] = a[i][j][3];
1442 }
1443 cdft(2 * n2, isgn, t, ip, w);
1444 cdft(2 * n2, isgn, &t[2 * n2], ip, w);
1445 for (j = 0; j < n2; j++) {
1446 a[i][j][0] = t[2 * j];
1447 a[i][j][1] = t[2 * j + 1];
1448 a[i][j][2] = t[2 * n2 + 2 * j];
1449 a[i][j][3] = t[2 * n2 + 2 * j + 1];
1450 }
1451 } else if (n3 == 2) {
1452 for (j = 0; j < n2; j++) {
1453 t[2 * j] = a[i][j][0];
1454 t[2 * j + 1] = a[i][j][1];
1455 }
1456 cdft(2 * n2, isgn, t, ip, w);
1457 for (j = 0; j < n2; j++) {
1458 a[i][j][0] = t[2 * j];
1459 a[i][j][1] = t[2 * j + 1];
1460 }
1461 }
1462 if (icr != 0 && isgn < 0) {
1463 for (j = 0; j < n2; j++) {
1464 rdft(n3, isgn, a[i][j], ip, w);
1465 }
1466 }
1467 }
1468 return (void *) 0;
1469 }
1470
1471
cdft3db_th(void * p)1472 void *cdft3db_th(void *p)
1473 {
1474 void cdft(int n, int isgn, double *a, int *ip, double *w);
1475 int nthread, n0, n1, n2, n3, isgn, *ip, i, j, k;
1476 double ***a, *t, *w;
1477
1478 nthread = ((fft3d_arg_t *) p)->nthread;
1479 n0 = ((fft3d_arg_t *) p)->n0;
1480 n1 = ((fft3d_arg_t *) p)->n1;
1481 n2 = ((fft3d_arg_t *) p)->n2;
1482 n3 = ((fft3d_arg_t *) p)->n3;
1483 isgn = ((fft3d_arg_t *) p)->isgn;
1484 a = ((fft3d_arg_t *) p)->a;
1485 t = ((fft3d_arg_t *) p)->t;
1486 ip = ((fft3d_arg_t *) p)->ip;
1487 w = ((fft3d_arg_t *) p)->w;
1488 if (n3 > 4) {
1489 for (j = n0; j < n2; j += nthread) {
1490 for (k = 0; k < n3; k += 8) {
1491 for (i = 0; i < n1; i++) {
1492 t[2 * i] = a[i][j][k];
1493 t[2 * i + 1] = a[i][j][k + 1];
1494 t[2 * n1 + 2 * i] = a[i][j][k + 2];
1495 t[2 * n1 + 2 * i + 1] = a[i][j][k + 3];
1496 t[4 * n1 + 2 * i] = a[i][j][k + 4];
1497 t[4 * n1 + 2 * i + 1] = a[i][j][k + 5];
1498 t[6 * n1 + 2 * i] = a[i][j][k + 6];
1499 t[6 * n1 + 2 * i + 1] = a[i][j][k + 7];
1500 }
1501 cdft(2 * n1, isgn, t, ip, w);
1502 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1503 cdft(2 * n1, isgn, &t[4 * n1], ip, w);
1504 cdft(2 * n1, isgn, &t[6 * n1], ip, w);
1505 for (i = 0; i < n1; i++) {
1506 a[i][j][k] = t[2 * i];
1507 a[i][j][k + 1] = t[2 * i + 1];
1508 a[i][j][k + 2] = t[2 * n1 + 2 * i];
1509 a[i][j][k + 3] = t[2 * n1 + 2 * i + 1];
1510 a[i][j][k + 4] = t[4 * n1 + 2 * i];
1511 a[i][j][k + 5] = t[4 * n1 + 2 * i + 1];
1512 a[i][j][k + 6] = t[6 * n1 + 2 * i];
1513 a[i][j][k + 7] = t[6 * n1 + 2 * i + 1];
1514 }
1515 }
1516 }
1517 } else if (n3 == 4) {
1518 for (j = n0; j < n2; j += nthread) {
1519 for (i = 0; i < n1; i++) {
1520 t[2 * i] = a[i][j][0];
1521 t[2 * i + 1] = a[i][j][1];
1522 t[2 * n1 + 2 * i] = a[i][j][2];
1523 t[2 * n1 + 2 * i + 1] = a[i][j][3];
1524 }
1525 cdft(2 * n1, isgn, t, ip, w);
1526 cdft(2 * n1, isgn, &t[2 * n1], ip, w);
1527 for (i = 0; i < n1; i++) {
1528 a[i][j][0] = t[2 * i];
1529 a[i][j][1] = t[2 * i + 1];
1530 a[i][j][2] = t[2 * n1 + 2 * i];
1531 a[i][j][3] = t[2 * n1 + 2 * i + 1];
1532 }
1533 }
1534 } else if (n3 == 2) {
1535 for (j = n0; j < n2; j += nthread) {
1536 for (i = 0; i < n1; i++) {
1537 t[2 * i] = a[i][j][0];
1538 t[2 * i + 1] = a[i][j][1];
1539 }
1540 cdft(2 * n1, isgn, t, ip, w);
1541 for (i = 0; i < n1; i++) {
1542 a[i][j][0] = t[2 * i];
1543 a[i][j][1] = t[2 * i + 1];
1544 }
1545 }
1546 }
1547 return (void *) 0;
1548 }
1549
1550
ddxt3da_th(void * p)1551 void *ddxt3da_th(void *p)
1552 {
1553 void ddct(int n, int isgn, double *a, int *ip, double *w);
1554 void ddst(int n, int isgn, double *a, int *ip, double *w);
1555 int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k;
1556 double ***a, *t, *w;
1557
1558 nthread = ((fft3d_arg_t *) p)->nthread;
1559 n0 = ((fft3d_arg_t *) p)->n0;
1560 n1 = ((fft3d_arg_t *) p)->n1;
1561 n2 = ((fft3d_arg_t *) p)->n2;
1562 n3 = ((fft3d_arg_t *) p)->n3;
1563 ics = ((fft3d_arg_t *) p)->ic;
1564 isgn = ((fft3d_arg_t *) p)->isgn;
1565 a = ((fft3d_arg_t *) p)->a;
1566 t = ((fft3d_arg_t *) p)->t;
1567 ip = ((fft3d_arg_t *) p)->ip;
1568 w = ((fft3d_arg_t *) p)->w;
1569 for (i = n0; i < n1; i += nthread) {
1570 if (ics == 0) {
1571 for (j = 0; j < n2; j++) {
1572 ddct(n3, isgn, a[i][j], ip, w);
1573 }
1574 } else {
1575 for (j = 0; j < n2; j++) {
1576 ddst(n3, isgn, a[i][j], ip, w);
1577 }
1578 }
1579 if (n3 > 2) {
1580 for (k = 0; k < n3; k += 4) {
1581 for (j = 0; j < n2; j++) {
1582 t[j] = a[i][j][k];
1583 t[n2 + j] = a[i][j][k + 1];
1584 t[2 * n2 + j] = a[i][j][k + 2];
1585 t[3 * n2 + j] = a[i][j][k + 3];
1586 }
1587 if (ics == 0) {
1588 ddct(n2, isgn, t, ip, w);
1589 ddct(n2, isgn, &t[n2], ip, w);
1590 ddct(n2, isgn, &t[2 * n2], ip, w);
1591 ddct(n2, isgn, &t[3 * n2], ip, w);
1592 } else {
1593 ddst(n2, isgn, t, ip, w);
1594 ddst(n2, isgn, &t[n2], ip, w);
1595 ddst(n2, isgn, &t[2 * n2], ip, w);
1596 ddst(n2, isgn, &t[3 * n2], ip, w);
1597 }
1598 for (j = 0; j < n2; j++) {
1599 a[i][j][k] = t[j];
1600 a[i][j][k + 1] = t[n2 + j];
1601 a[i][j][k + 2] = t[2 * n2 + j];
1602 a[i][j][k + 3] = t[3 * n2 + j];
1603 }
1604 }
1605 } else if (n3 == 2) {
1606 for (j = 0; j < n2; j++) {
1607 t[j] = a[i][j][0];
1608 t[n2 + j] = a[i][j][1];
1609 }
1610 if (ics == 0) {
1611 ddct(n2, isgn, t, ip, w);
1612 ddct(n2, isgn, &t[n2], ip, w);
1613 } else {
1614 ddst(n2, isgn, t, ip, w);
1615 ddst(n2, isgn, &t[n2], ip, w);
1616 }
1617 for (j = 0; j < n2; j++) {
1618 a[i][j][0] = t[j];
1619 a[i][j][1] = t[n2 + j];
1620 }
1621 }
1622 }
1623 return (void *) 0;
1624 }
1625
1626
ddxt3db_th(void * p)1627 void *ddxt3db_th(void *p)
1628 {
1629 void ddct(int n, int isgn, double *a, int *ip, double *w);
1630 void ddst(int n, int isgn, double *a, int *ip, double *w);
1631 int nthread, n0, n1, n2, n3, ics, isgn, *ip, i, j, k;
1632 double ***a, *t, *w;
1633
1634 nthread = ((fft3d_arg_t *) p)->nthread;
1635 n0 = ((fft3d_arg_t *) p)->n0;
1636 n1 = ((fft3d_arg_t *) p)->n1;
1637 n2 = ((fft3d_arg_t *) p)->n2;
1638 n3 = ((fft3d_arg_t *) p)->n3;
1639 ics = ((fft3d_arg_t *) p)->ic;
1640 isgn = ((fft3d_arg_t *) p)->isgn;
1641 a = ((fft3d_arg_t *) p)->a;
1642 t = ((fft3d_arg_t *) p)->t;
1643 ip = ((fft3d_arg_t *) p)->ip;
1644 w = ((fft3d_arg_t *) p)->w;
1645 if (n3 > 2) {
1646 for (j = n0; j < n2; j += nthread) {
1647 for (k = 0; k < n3; k += 4) {
1648 for (i = 0; i < n1; i++) {
1649 t[i] = a[i][j][k];
1650 t[n1 + i] = a[i][j][k + 1];
1651 t[2 * n1 + i] = a[i][j][k + 2];
1652 t[3 * n1 + i] = a[i][j][k + 3];
1653 }
1654 if (ics == 0) {
1655 ddct(n1, isgn, t, ip, w);
1656 ddct(n1, isgn, &t[n1], ip, w);
1657 ddct(n1, isgn, &t[2 * n1], ip, w);
1658 ddct(n1, isgn, &t[3 * n1], ip, w);
1659 } else {
1660 ddst(n1, isgn, t, ip, w);
1661 ddst(n1, isgn, &t[n1], ip, w);
1662 ddst(n1, isgn, &t[2 * n1], ip, w);
1663 ddst(n1, isgn, &t[3 * n1], ip, w);
1664 }
1665 for (i = 0; i < n1; i++) {
1666 a[i][j][k] = t[i];
1667 a[i][j][k + 1] = t[n1 + i];
1668 a[i][j][k + 2] = t[2 * n1 + i];
1669 a[i][j][k + 3] = t[3 * n1 + i];
1670 }
1671 }
1672 }
1673 } else if (n3 == 2) {
1674 for (j = n0; j < n2; j += nthread) {
1675 for (i = 0; i < n1; i++) {
1676 t[i] = a[i][j][0];
1677 t[n1 + i] = a[i][j][1];
1678 }
1679 if (ics == 0) {
1680 ddct(n1, isgn, t, ip, w);
1681 ddct(n1, isgn, &t[n1], ip, w);
1682 } else {
1683 ddst(n1, isgn, t, ip, w);
1684 ddst(n1, isgn, &t[n1], ip, w);
1685 }
1686 for (i = 0; i < n1; i++) {
1687 a[i][j][0] = t[i];
1688 a[i][j][1] = t[n1 + i];
1689 }
1690 }
1691 }
1692 return (void *) 0;
1693 }
1694 #endif /* USE_FFT3D_THREADS */
1695
1696