1 /****************************************************************************** 2 * * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 #include <stdlib.h> 21 #include <stdio.h> 22 23 #include "ixheaacd_type_def.h" 24 #include "ixheaacd_interface.h" 25 #include "ixheaacd_constants.h" 26 #include "ixheaacd_basic_ops32.h" 27 #include "ixheaacd_basic_ops40.h" 28 #include "ixheaacd_function_selector.h" 29 30 extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514]; 31 extern const WORD32 ixheaacd_twiddle_table_3pr[1155]; 32 extern const WORD32 ixheaacd_twiddle_table_3pi[1155]; 33 extern const WORD8 ixheaacd_mps_dig_rev[16]; 34 35 #define PLATFORM_INLINE __inline 36 37 #define DIG_REV(i, m, j) \ 38 do { \ 39 unsigned _ = (i); \ 40 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \ 41 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \ 42 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \ 43 (j) = _ >> (m); \ 44 } while (0) 45 46 static PLATFORM_INLINE WORD32 ixheaacd_mult32_sat(WORD32 a, WORD32 b) { 47 WORD32 result; 48 WORD64 temp_result; 49 50 temp_result = (WORD64)a * (WORD64)b; 51 result = ixheaacd_sat64_32(temp_result >> 31); 52 53 return (result); 54 } 55 56 static PLATFORM_INLINE WORD32 ixheaacd_mac32_sat(WORD32 a, WORD32 b, WORD32 c) { 57 WORD32 result; 58 59 result = ixheaacd_add32_sat(a, ixheaacd_mult32_sat(b, c)); 60 61 return (result); 62 } 63 64 65 VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, 66 WORD32 *fin_im, WORD32 nlength) { 67 WORD32 i, j, k, n_stages; 68 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 69 WORD32 del, nodespacing, in_loop_cnt; 70 WORD32 y[128]; 71 WORD32 npoints = nlength; 72 WORD32 *ptr_y = y; 73 const WORD32 *ptr_w; 74 n_stages = 30 - ixheaacd_norm32(npoints); 75 76 n_stages = n_stages >> 1; 77 78 ptr_w = ixheaacd_twiddle_table_fft_32x32; 79 80 for (i = 0; i < npoints; i += 4) { 81 WORD32 *inp = ptr_x; 82 h2 = ixheaacd_mps_dig_rev[i >> 2]; 83 inp += (h2); 84 85 x0r = *inp; 86 x0i = *(inp + 1); 87 inp += (npoints >> 1); 88 89 x1r = *inp; 90 x1i = *(inp + 1); 91 inp += (npoints >> 1); 92 93 x2r = *inp; 94 x2i = *(inp + 1); 95 inp += (npoints >> 1); 96 97 x3r = *inp; 98 x3i = *(inp + 1); 99 100 x0r = ixheaacd_add32_sat(x0r, x2r); 101 x0i = ixheaacd_add32_sat(x0i, x2i); 102 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 103 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 104 x1r = ixheaacd_add32_sat(x1r, x3r); 105 x1i = ixheaacd_add32_sat(x1i, x3i); 106 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 107 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 108 109 x0r = ixheaacd_add32_sat(x0r, x1r); 110 x0i = ixheaacd_add32_sat(x0i, x1i); 111 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 112 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 113 x2r = ixheaacd_add32_sat(x2r, x3i); 114 x2i = ixheaacd_sub32_sat(x2i, x3r); 115 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 116 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 117 118 *ptr_y++ = x0r; 119 *ptr_y++ = x0i; 120 *ptr_y++ = x2r; 121 *ptr_y++ = x2i; 122 *ptr_y++ = x1r; 123 *ptr_y++ = x1i; 124 *ptr_y++ = x3i; 125 *ptr_y++ = x3r; 126 } 127 ptr_y -= 2 * npoints; 128 del = 4; 129 nodespacing = 64; 130 in_loop_cnt = npoints >> 4; 131 for (i = n_stages - 1; i > 0; i--) { 132 const WORD32 *twiddles = ptr_w; 133 WORD32 *data = ptr_y; 134 WORD32 w1h, w2h, w3h, w1l, w2l, w3l; 135 WORD32 sec_loop_cnt; 136 137 for (k = in_loop_cnt; k != 0; k--) { 138 x0r = (*data); 139 x0i = (*(data + 1)); 140 data += (del << 1); 141 142 x1r = (*data); 143 x1i = (*(data + 1)); 144 data += (del << 1); 145 146 x2r = (*data); 147 x2i = (*(data + 1)); 148 data += (del << 1); 149 150 x3r = (*data); 151 x3i = (*(data + 1)); 152 data -= 3 * (del << 1); 153 154 x0r = ixheaacd_add32_sat(x0r, x2r); 155 x0i = ixheaacd_add32_sat(x0i, x2i); 156 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 157 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 158 x1r = ixheaacd_add32_sat(x1r, x3r); 159 x1i = ixheaacd_add32_sat(x1i, x3i); 160 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 161 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 162 163 x0r = ixheaacd_add32_sat(x0r, x1r); 164 x0i = ixheaacd_add32_sat(x0i, x1i); 165 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 166 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 167 x2r = ixheaacd_add32_sat(x2r, x3i); 168 x2i = ixheaacd_sub32_sat(x2i, x3r); 169 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 170 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 171 172 *data = x0r; 173 *(data + 1) = x0i; 174 data += (del << 1); 175 176 *data = x2r; 177 *(data + 1) = x2i; 178 data += (del << 1); 179 180 *data = x1r; 181 *(data + 1) = x1i; 182 data += (del << 1); 183 184 *data = x3i; 185 *(data + 1) = x3r; 186 data += (del << 1); 187 } 188 data = ptr_y + 2; 189 190 sec_loop_cnt = (nodespacing * del); 191 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - 192 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - 193 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - 194 (sec_loop_cnt / 256); 195 j = nodespacing; 196 197 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { 198 w1h = *(twiddles + 2 * j); 199 w1l = *(twiddles + 2 * j + 1); 200 w2h = *(twiddles + 2 * (j << 1)); 201 w2l = *(twiddles + 2 * (j << 1) + 1); 202 w3h = *(twiddles + 2 * j + 2 * (j << 1)); 203 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); 204 205 for (k = in_loop_cnt; k != 0; k--) { 206 WORD32 tmp; 207 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 208 209 data += (del << 1); 210 211 x1r = *data; 212 x1i = *(data + 1); 213 data += (del << 1); 214 215 x2r = *data; 216 x2i = *(data + 1); 217 data += (del << 1); 218 219 x3r = *data; 220 x3i = *(data + 1); 221 data -= 3 * (del << 1); 222 223 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 224 ixheaacd_mult32_sat(x1i, w1h)); 225 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 226 x1r = tmp; 227 228 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l), 229 ixheaacd_mult32_sat(x2i, w2h)); 230 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); 231 x2r = tmp; 232 233 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l), 234 ixheaacd_mult32_sat(x3i, w3h)); 235 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); 236 x3r = tmp; 237 238 x0r = (*data); 239 x0i = (*(data + 1)); 240 241 x0r = ixheaacd_add32_sat(x0r, x2r); 242 x0i = ixheaacd_add32_sat(x0i, x2i); 243 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 244 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 245 x1r = ixheaacd_add32_sat(x1r, x3r); 246 x1i = ixheaacd_add32_sat(x1i, x3i); 247 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 248 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 249 250 x0r = ixheaacd_add32_sat(x0r, x1r); 251 x0i = ixheaacd_add32_sat(x0i, x1i); 252 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 253 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 254 x2r = ixheaacd_add32_sat(x2r, x3i); 255 x2i = ixheaacd_sub32_sat(x2i, x3r); 256 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 257 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 258 259 *data = x0r; 260 *(data + 1) = x0i; 261 data += (del << 1); 262 263 *data = x2r; 264 *(data + 1) = x2i; 265 data += (del << 1); 266 267 *data = x1r; 268 *(data + 1) = x1i; 269 data += (del << 1); 270 271 *data = x3i; 272 *(data + 1) = x3r; 273 data += (del << 1); 274 } 275 data -= 2 * npoints; 276 data += 2; 277 } 278 for (; j <= (nodespacing * del) >> 1; j += nodespacing) { 279 w1h = *(twiddles + 2 * j); 280 w2h = *(twiddles + 2 * (j << 1)); 281 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 282 w1l = *(twiddles + 2 * j + 1); 283 w2l = *(twiddles + 2 * (j << 1) + 1); 284 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 285 286 for (k = in_loop_cnt; k != 0; k--) { 287 WORD32 tmp; 288 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 289 290 data += (del << 1); 291 292 x1r = *data; 293 x1i = *(data + 1); 294 data += (del << 1); 295 296 x2r = *data; 297 x2i = *(data + 1); 298 data += (del << 1); 299 300 x3r = *data; 301 x3i = *(data + 1); 302 data -= 3 * (del << 1); 303 304 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 305 ixheaacd_mult32_sat(x1i, w1h)); 306 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 307 x1r = tmp; 308 309 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l), 310 ixheaacd_mult32_sat(x2i, w2h)); 311 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); 312 x2r = tmp; 313 314 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h), 315 ixheaacd_mult32_sat(x3i, w3l)); 316 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), 317 ixheaacd_mult32_sat(x3r, w3l)); 318 x3r = tmp; 319 320 x0r = (*data); 321 x0i = (*(data + 1)); 322 323 x0r = ixheaacd_add32_sat(x0r, x2r); 324 x0i = ixheaacd_add32_sat(x0i, x2i); 325 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 326 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 327 x1r = ixheaacd_add32_sat(x1r, x3r); 328 x1i = ixheaacd_add32_sat(x1i, x3i); 329 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 330 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 331 332 x0r = ixheaacd_add32_sat(x0r, x1r); 333 x0i = ixheaacd_add32_sat(x0i, x1i); 334 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 335 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 336 x2r = ixheaacd_add32_sat(x2r, x3i); 337 x2i = ixheaacd_sub32_sat(x2i, x3r); 338 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 339 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 340 341 *data = x0r; 342 *(data + 1) = x0i; 343 data += (del << 1); 344 345 *data = x2r; 346 *(data + 1) = x2i; 347 data += (del << 1); 348 349 *data = x1r; 350 *(data + 1) = x1i; 351 data += (del << 1); 352 353 *data = x3i; 354 *(data + 1) = x3r; 355 data += (del << 1); 356 } 357 data -= 2 * npoints; 358 data += 2; 359 } 360 for (; j <= sec_loop_cnt * 2; j += nodespacing) { 361 w1h = *(twiddles + 2 * j); 362 w2h = *(twiddles + 2 * (j << 1) - 512); 363 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 364 w1l = *(twiddles + 2 * j + 1); 365 w2l = *(twiddles + 2 * (j << 1) - 511); 366 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 367 368 for (k = in_loop_cnt; k != 0; k--) { 369 WORD32 tmp; 370 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 371 372 data += (del << 1); 373 374 x1r = *data; 375 x1i = *(data + 1); 376 data += (del << 1); 377 378 x2r = *data; 379 x2i = *(data + 1); 380 data += (del << 1); 381 382 x3r = *data; 383 x3i = *(data + 1); 384 data -= 3 * (del << 1); 385 386 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 387 ixheaacd_mult32_sat(x1i, w1h)); 388 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 389 x1r = tmp; 390 391 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h), 392 ixheaacd_mult32_sat(x2i, w2l)); 393 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h), 394 ixheaacd_mult32_sat(x2r, w2l)); 395 x2r = tmp; 396 397 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h), 398 ixheaacd_mult32_sat(x3i, w3l)); 399 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), 400 ixheaacd_mult32_sat(x3r, w3l)); 401 x3r = tmp; 402 403 x0r = (*data); 404 x0i = (*(data + 1)); 405 406 x0r = ixheaacd_add32_sat(x0r, x2r); 407 x0i = ixheaacd_add32_sat(x0i, x2i); 408 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 409 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 410 x1r = ixheaacd_add32_sat(x1r, x3r); 411 x1i = ixheaacd_add32_sat(x1i, x3i); 412 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 413 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 414 415 x0r = ixheaacd_add32_sat(x0r, x1r); 416 x0i = ixheaacd_add32_sat(x0i, x1i); 417 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 418 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 419 x2r = ixheaacd_add32_sat(x2r, x3i); 420 x2i = ixheaacd_sub32_sat(x2i, x3r); 421 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 422 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 423 424 *data = x0r; 425 *(data + 1) = x0i; 426 data += (del << 1); 427 428 *data = x2r; 429 *(data + 1) = x2i; 430 data += (del << 1); 431 432 *data = x1r; 433 *(data + 1) = x1i; 434 data += (del << 1); 435 436 *data = x3i; 437 *(data + 1) = x3r; 438 data += (del << 1); 439 } 440 data -= 2 * npoints; 441 data += 2; 442 } 443 for (; j < nodespacing * del; j += nodespacing) { 444 w1h = *(twiddles + 2 * j); 445 w2h = *(twiddles + 2 * (j << 1) - 512); 446 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); 447 w1l = *(twiddles + 2 * j + 1); 448 w2l = *(twiddles + 2 * (j << 1) - 511); 449 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); 450 451 for (k = in_loop_cnt; k != 0; k--) { 452 WORD32 tmp; 453 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 454 455 data += (del << 1); 456 457 x1r = *data; 458 x1i = *(data + 1); 459 data += (del << 1); 460 461 x2r = *data; 462 x2i = *(data + 1); 463 data += (del << 1); 464 465 x3r = *data; 466 x3i = *(data + 1); 467 data -= 3 * (del << 1); 468 469 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 470 ixheaacd_mult32_sat(x1i, w1h)); 471 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 472 x1r = tmp; 473 474 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h), 475 ixheaacd_mult32_sat(x2i, w2l)); 476 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h), 477 ixheaacd_mult32_sat(x2r, w2l)); 478 x2r = tmp; 479 480 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), 481 ixheaacd_mult32_sat(x3r, w3l)); 482 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); 483 x3r = tmp; 484 485 x0r = (*data); 486 x0i = (*(data + 1)); 487 488 x0r = ixheaacd_add32_sat(x0r, x2r); 489 x0i = ixheaacd_add32_sat(x0i, x2i); 490 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 491 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 492 x1r = ixheaacd_add32_sat(x1r, x3r); 493 x1i = ixheaacd_sub32_sat(x1i, x3i); 494 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 495 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 496 497 x0r = ixheaacd_add32_sat(x0r, x1r); 498 x0i = ixheaacd_add32_sat(x0i, x1i); 499 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 500 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 501 x2r = ixheaacd_add32_sat(x2r, x3i); 502 x2i = ixheaacd_sub32_sat(x2i, x3r); 503 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 504 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 505 506 *data = x0r; 507 *(data + 1) = x0i; 508 data += (del << 1); 509 510 *data = x2r; 511 *(data + 1) = x2i; 512 data += (del << 1); 513 514 *data = x1r; 515 *(data + 1) = x1i; 516 data += (del << 1); 517 518 *data = x3i; 519 *(data + 1) = x3r; 520 data += (del << 1); 521 } 522 data -= 2 * npoints; 523 data += 2; 524 } 525 nodespacing >>= 2; 526 del <<= 2; 527 in_loop_cnt >>= 2; 528 } 529 530 for (i = 0; i < 2 * nlength; i += 2) { 531 fin_re[i] = y[i]; 532 fin_im[i] = y[i + 1]; 533 } 534 535 return; 536 } 537 538 VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength, 539 WORD32 fft_mode, WORD32 *preshift) { 540 WORD32 i, j, k, n_stages; 541 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 542 WORD32 del, nodespacing, in_loop_cnt; 543 WORD32 not_power_4; 544 WORD32 npts, shift; 545 WORD32 dig_rev_shift; 546 WORD32 ptr_x[1024]; 547 WORD32 y[1024]; 548 WORD32 npoints = nlength; 549 WORD32 n = 0; 550 WORD32 *ptr_y = y; 551 const WORD32 *ptr_w; 552 dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16; 553 n_stages = 30 - ixheaacd_norm32(npoints); 554 not_power_4 = n_stages & 1; 555 556 n_stages = n_stages >> 1; 557 558 npts = npoints; 559 while (npts >> 1) { 560 n++; 561 npts = npts >> 1; 562 } 563 564 if (n % 2 == 0) 565 shift = ((n + 4)) / 2; 566 else 567 shift = ((n + 3) / 2); 568 569 for (i = 0; i < nlength; i++) { 570 ptr_x[2 * i] = (xr[i] / (1 << (shift))); 571 ptr_x[2 * i + 1] = (xi[i] / (1 << (shift))); 572 } 573 574 if (fft_mode == -1) { 575 ptr_w = ixheaacd_twiddle_table_fft_32x32; 576 577 for (i = 0; i < npoints; i += 4) { 578 WORD32 *inp = ptr_x; 579 580 DIG_REV(i, dig_rev_shift, h2); 581 if (not_power_4) { 582 h2 += 1; 583 h2 &= ~1; 584 } 585 inp += (h2); 586 587 x0r = *inp; 588 x0i = *(inp + 1); 589 inp += (npoints >> 1); 590 591 x1r = *inp; 592 x1i = *(inp + 1); 593 inp += (npoints >> 1); 594 595 x2r = *inp; 596 x2i = *(inp + 1); 597 inp += (npoints >> 1); 598 599 x3r = *inp; 600 x3i = *(inp + 1); 601 602 x0r = ixheaacd_add32_sat(x0r, x2r); 603 x0i = ixheaacd_add32_sat(x0i, x2i); 604 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 605 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 606 x1r = ixheaacd_add32_sat(x1r, x3r); 607 x1i = ixheaacd_add32_sat(x1i, x3i); 608 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 609 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 610 611 x0r = ixheaacd_add32_sat(x0r, x1r); 612 x0i = ixheaacd_add32_sat(x0i, x1i); 613 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 614 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 615 x2r = ixheaacd_add32_sat(x2r, x3i); 616 x2i = ixheaacd_sub32_sat(x2i, x3r); 617 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 618 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 619 620 *ptr_y++ = x0r; 621 *ptr_y++ = x0i; 622 *ptr_y++ = x2r; 623 *ptr_y++ = x2i; 624 *ptr_y++ = x1r; 625 *ptr_y++ = x1i; 626 *ptr_y++ = x3i; 627 *ptr_y++ = x3r; 628 } 629 ptr_y -= 2 * npoints; 630 del = 4; 631 nodespacing = 64; 632 in_loop_cnt = npoints >> 4; 633 for (i = n_stages - 1; i > 0; i--) { 634 const WORD32 *twiddles = ptr_w; 635 WORD32 *data = ptr_y; 636 WORD32 w1h, w2h, w3h, w1l, w2l, w3l; 637 WORD32 sec_loop_cnt; 638 639 for (k = in_loop_cnt; k != 0; k--) { 640 x0r = (*data); 641 x0i = (*(data + 1)); 642 data += (del << 1); 643 644 x1r = (*data); 645 x1i = (*(data + 1)); 646 data += (del << 1); 647 648 x2r = (*data); 649 x2i = (*(data + 1)); 650 data += (del << 1); 651 652 x3r = (*data); 653 x3i = (*(data + 1)); 654 data -= 3 * (del << 1); 655 656 x0r = ixheaacd_add32_sat(x0r, x2r); 657 x0i = ixheaacd_add32_sat(x0i, x2i); 658 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 659 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 660 x1r = ixheaacd_add32_sat(x1r, x3r); 661 x1i = ixheaacd_add32_sat(x1i, x3i); 662 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 663 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 664 665 x0r = ixheaacd_add32_sat(x0r, x1r); 666 x0i = ixheaacd_add32_sat(x0i, x1i); 667 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 668 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 669 x2r = ixheaacd_add32_sat(x2r, x3i); 670 x2i = ixheaacd_sub32_sat(x2i, x3r); 671 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 672 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 673 674 *data = x0r; 675 *(data + 1) = x0i; 676 data += (del << 1); 677 678 *data = x2r; 679 *(data + 1) = x2i; 680 data += (del << 1); 681 682 *data = x1r; 683 *(data + 1) = x1i; 684 data += (del << 1); 685 686 *data = x3i; 687 *(data + 1) = x3r; 688 data += (del << 1); 689 } 690 data = ptr_y + 2; 691 692 sec_loop_cnt = (nodespacing * del); 693 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - 694 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - 695 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - 696 (sec_loop_cnt / 256); 697 j = nodespacing; 698 699 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { 700 w1h = *(twiddles + 2 * j); 701 w1l = *(twiddles + 2 * j + 1); 702 w2h = *(twiddles + 2 * (j << 1)); 703 w2l = *(twiddles + 2 * (j << 1) + 1); 704 w3h = *(twiddles + 2 * j + 2 * (j << 1)); 705 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); 706 707 for (k = in_loop_cnt; k != 0; k--) { 708 WORD32 tmp; 709 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 710 711 data += (del << 1); 712 713 x1r = *data; 714 x1i = *(data + 1); 715 data += (del << 1); 716 717 x2r = *data; 718 x2i = *(data + 1); 719 data += (del << 1); 720 721 x3r = *data; 722 x3i = *(data + 1); 723 data -= 3 * (del << 1); 724 725 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 726 ixheaacd_mult32_sat(x1i, w1h)); 727 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 728 x1r = tmp; 729 730 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l), 731 ixheaacd_mult32_sat(x2i, w2h)); 732 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); 733 x2r = tmp; 734 735 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3l), 736 ixheaacd_mult32_sat(x3i, w3h)); 737 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); 738 x3r = tmp; 739 740 x0r = (*data); 741 x0i = (*(data + 1)); 742 743 x0r = ixheaacd_add32_sat(x0r, x2r); 744 x0i = ixheaacd_add32_sat(x0i, x2i); 745 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 746 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 747 x1r = ixheaacd_add32_sat(x1r, x3r); 748 x1i = ixheaacd_add32_sat(x1i, x3i); 749 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 750 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 751 752 x0r = ixheaacd_add32_sat(x0r, x1r); 753 x0i = ixheaacd_add32_sat(x0i, x1i); 754 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 755 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 756 x2r = ixheaacd_add32_sat(x2r, x3i); 757 x2i = ixheaacd_sub32_sat(x2i, x3r); 758 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 759 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 760 761 *data = x0r; 762 *(data + 1) = x0i; 763 data += (del << 1); 764 765 *data = x2r; 766 *(data + 1) = x2i; 767 data += (del << 1); 768 769 *data = x1r; 770 *(data + 1) = x1i; 771 data += (del << 1); 772 773 *data = x3i; 774 *(data + 1) = x3r; 775 data += (del << 1); 776 } 777 data -= 2 * npoints; 778 data += 2; 779 } 780 for (; j <= (nodespacing * del) >> 1; j += nodespacing) { 781 w1h = *(twiddles + 2 * j); 782 w2h = *(twiddles + 2 * (j << 1)); 783 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 784 w1l = *(twiddles + 2 * j + 1); 785 w2l = *(twiddles + 2 * (j << 1) + 1); 786 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 787 788 for (k = in_loop_cnt; k != 0; k--) { 789 WORD32 tmp; 790 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 791 data += (del << 1); 792 793 x1r = *data; 794 x1i = *(data + 1); 795 data += (del << 1); 796 797 x2r = *data; 798 x2i = *(data + 1); 799 data += (del << 1); 800 801 x3r = *data; 802 x3i = *(data + 1); 803 data -= 3 * (del << 1); 804 805 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 806 ixheaacd_mult32_sat(x1i, w1h)); 807 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 808 x1r = tmp; 809 810 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2l), 811 ixheaacd_mult32_sat(x2i, w2h)); 812 x2i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); 813 x2r = tmp; 814 815 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h), 816 ixheaacd_mult32_sat(x3i, w3l)); 817 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), 818 ixheaacd_mult32_sat(x3r, w3l)); 819 x3r = tmp; 820 821 x0r = (*data); 822 x0i = (*(data + 1)); 823 824 x0r = ixheaacd_add32_sat(x0r, x2r); 825 x0i = ixheaacd_add32_sat(x0i, x2i); 826 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 827 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 828 x1r = ixheaacd_add32_sat(x1r, x3r); 829 x1i = ixheaacd_add32_sat(x1i, x3i); 830 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 831 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 832 833 x0r = ixheaacd_add32_sat(x0r, x1r); 834 x0i = ixheaacd_add32_sat(x0i, x1i); 835 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 836 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 837 x2r = ixheaacd_add32_sat(x2r, x3i); 838 x2i = ixheaacd_sub32_sat(x2i, x3r); 839 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 840 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 841 842 *data = x0r; 843 *(data + 1) = x0i; 844 data += (del << 1); 845 846 *data = x2r; 847 *(data + 1) = x2i; 848 data += (del << 1); 849 850 *data = x1r; 851 *(data + 1) = x1i; 852 data += (del << 1); 853 854 *data = x3i; 855 *(data + 1) = x3r; 856 data += (del << 1); 857 } 858 data -= 2 * npoints; 859 data += 2; 860 } 861 for (; j <= sec_loop_cnt * 2; j += nodespacing) { 862 w1h = *(twiddles + 2 * j); 863 w2h = *(twiddles + 2 * (j << 1) - 512); 864 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 865 w1l = *(twiddles + 2 * j + 1); 866 w2l = *(twiddles + 2 * (j << 1) - 511); 867 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 868 869 for (k = in_loop_cnt; k != 0; k--) { 870 WORD32 tmp; 871 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 872 873 data += (del << 1); 874 875 x1r = *data; 876 x1i = *(data + 1); 877 data += (del << 1); 878 879 x2r = *data; 880 x2i = *(data + 1); 881 data += (del << 1); 882 883 x3r = *data; 884 x3i = *(data + 1); 885 data -= 3 * (del << 1); 886 887 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 888 ixheaacd_mult32_sat(x1i, w1h)); 889 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 890 x1r = tmp; 891 892 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h), 893 ixheaacd_mult32_sat(x2i, w2l)); 894 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h), 895 ixheaacd_mult32_sat(x2r, w2l)); 896 x2r = tmp; 897 898 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3h), 899 ixheaacd_mult32_sat(x3i, w3l)); 900 x3i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), 901 ixheaacd_mult32_sat(x3r, w3l)); 902 x3r = tmp; 903 904 x0r = (*data); 905 x0i = (*(data + 1)); 906 907 x0r = ixheaacd_add32_sat(x0r, x2r); 908 x0i = ixheaacd_add32_sat(x0i, x2i); 909 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 910 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 911 x1r = ixheaacd_add32_sat(x1r, x3r); 912 x1i = ixheaacd_add32_sat(x1i, x3i); 913 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 914 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 915 916 x0r = ixheaacd_add32_sat(x0r, x1r); 917 x0i = ixheaacd_add32_sat(x0i, x1i); 918 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 919 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 920 x2r = ixheaacd_add32_sat(x2r, x3i); 921 x2i = ixheaacd_sub32_sat(x2i, x3r); 922 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 923 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 924 925 *data = x0r; 926 *(data + 1) = x0i; 927 data += (del << 1); 928 929 *data = x2r; 930 *(data + 1) = x2i; 931 data += (del << 1); 932 933 *data = x1r; 934 *(data + 1) = x1i; 935 data += (del << 1); 936 937 *data = x3i; 938 *(data + 1) = x3r; 939 data += (del << 1); 940 } 941 data -= 2 * npoints; 942 data += 2; 943 } 944 for (; j < nodespacing * del; j += nodespacing) { 945 w1h = *(twiddles + 2 * j); 946 w2h = *(twiddles + 2 * (j << 1) - 512); 947 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); 948 w1l = *(twiddles + 2 * j + 1); 949 w2l = *(twiddles + 2 * (j << 1) - 511); 950 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); 951 952 for (k = in_loop_cnt; k != 0; k--) { 953 WORD32 tmp; 954 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 955 956 data += (del << 1); 957 958 x1r = *data; 959 x1i = *(data + 1); 960 data += (del << 1); 961 962 x2r = *data; 963 x2i = *(data + 1); 964 data += (del << 1); 965 966 x3r = *data; 967 x3i = *(data + 1); 968 data -= 3 * (del << 1); 969 970 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 971 ixheaacd_mult32_sat(x1i, w1h)); 972 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 973 x1r = tmp; 974 975 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2h), 976 ixheaacd_mult32_sat(x2i, w2l)); 977 x2i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2i, w2h), 978 ixheaacd_mult32_sat(x2r, w2l)); 979 x2r = tmp; 980 981 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3i, w3h), 982 ixheaacd_mult32_sat(x3r, w3l)); 983 x3i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); 984 x3r = tmp; 985 986 x0r = (*data); 987 x0i = (*(data + 1)); 988 989 x0r = ixheaacd_add32_sat(x0r, x2r); 990 x0i = ixheaacd_add32_sat(x0i, x2i); 991 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 992 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 993 x1r = ixheaacd_add32_sat(x1r, x3r); 994 x1i = ixheaacd_sub32_sat(x1i, x3i); 995 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 996 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 997 998 x0r = ixheaacd_add32_sat(x0r, x1r); 999 x0i = ixheaacd_add32_sat(x0i, x1i); 1000 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1001 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1002 x2r = ixheaacd_add32_sat(x2r, x3i); 1003 x2i = ixheaacd_sub32_sat(x2i, x3r); 1004 x3i = ixheaacd_sub32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1005 x3r = ixheaacd_add32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1006 1007 *data = x0r; 1008 *(data + 1) = x0i; 1009 data += (del << 1); 1010 1011 *data = x2r; 1012 *(data + 1) = x2i; 1013 data += (del << 1); 1014 1015 *data = x1r; 1016 *(data + 1) = x1i; 1017 data += (del << 1); 1018 1019 *data = x3i; 1020 *(data + 1) = x3r; 1021 data += (del << 1); 1022 } 1023 data -= 2 * npoints; 1024 data += 2; 1025 } 1026 nodespacing >>= 2; 1027 del <<= 2; 1028 in_loop_cnt >>= 2; 1029 } 1030 if (not_power_4) { 1031 const WORD32 *twiddles = ptr_w; 1032 nodespacing <<= 1; 1033 shift += 1; 1034 1035 for (j = del / 2; j != 0; j--) { 1036 WORD32 w1h = *twiddles; 1037 WORD32 w1l = *(twiddles + 1); 1038 WORD32 tmp; 1039 twiddles += nodespacing * 2; 1040 1041 x0r = *ptr_y; 1042 x0i = *(ptr_y + 1); 1043 ptr_y += (del << 1); 1044 1045 x1r = *ptr_y; 1046 x1i = *(ptr_y + 1); 1047 1048 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1l), 1049 ixheaacd_mult32_sat(x1i, w1h)); 1050 x1i = ixheaacd_mac32_sat(ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 1051 x1r = tmp; 1052 1053 *ptr_y = (x0r) / 2 - (x1r) / 2; 1054 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1055 ptr_y -= (del << 1); 1056 1057 *ptr_y = (x0r) / 2 + (x1r) / 2; 1058 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1059 ptr_y += 2; 1060 } 1061 twiddles = ptr_w; 1062 for (j = del / 2; j != 0; j--) { 1063 WORD32 w1h = *twiddles; 1064 WORD32 w1l = *(twiddles + 1); 1065 WORD32 tmp; 1066 twiddles += nodespacing * 2; 1067 1068 x0r = *ptr_y; 1069 x0i = *(ptr_y + 1); 1070 ptr_y += (del << 1); 1071 1072 x1r = *ptr_y; 1073 x1i = *(ptr_y + 1); 1074 1075 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1h), 1076 ixheaacd_mult32_sat(x1i, w1l)); 1077 x1i = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1i, w1h), 1078 ixheaacd_mult32_sat(x1r, w1l)); 1079 x1r = tmp; 1080 1081 *ptr_y = (x0r) / 2 - (x1r) / 2; 1082 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1083 ptr_y -= (del << 1); 1084 1085 *ptr_y = (x0r) / 2 + (x1r) / 2; 1086 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1087 ptr_y += 2; 1088 } 1089 } 1090 1091 } 1092 1093 else { 1094 ptr_w = ixheaacd_twiddle_table_fft_32x32; 1095 1096 for (i = 0; i < npoints; i += 4) { 1097 WORD32 *inp = ptr_x; 1098 1099 DIG_REV(i, dig_rev_shift, h2); 1100 if (not_power_4) { 1101 h2 += 1; 1102 h2 &= ~1; 1103 } 1104 inp += (h2); 1105 1106 x0r = *inp; 1107 x0i = *(inp + 1); 1108 inp += (npoints >> 1); 1109 1110 x1r = *inp; 1111 x1i = *(inp + 1); 1112 inp += (npoints >> 1); 1113 1114 x2r = *inp; 1115 x2i = *(inp + 1); 1116 inp += (npoints >> 1); 1117 1118 x3r = *inp; 1119 x3i = *(inp + 1); 1120 1121 x0r = ixheaacd_add32_sat(x0r, x2r); 1122 x0i = ixheaacd_add32_sat(x0i, x2i); 1123 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 1124 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 1125 x1r = ixheaacd_add32_sat(x1r, x3r); 1126 x1i = ixheaacd_add32_sat(x1i, x3i); 1127 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 1128 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 1129 1130 x0r = ixheaacd_add32_sat(x0r, x1r); 1131 x0i = ixheaacd_add32_sat(x0i, x1i); 1132 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1133 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1134 x2r = ixheaacd_sub32_sat(x2r, x3i); 1135 x2i = ixheaacd_add32_sat(x2i, x3r); 1136 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1137 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1138 1139 *ptr_y++ = x0r; 1140 *ptr_y++ = x0i; 1141 *ptr_y++ = x2r; 1142 *ptr_y++ = x2i; 1143 *ptr_y++ = x1r; 1144 *ptr_y++ = x1i; 1145 *ptr_y++ = x3i; 1146 *ptr_y++ = x3r; 1147 } 1148 ptr_y -= 2 * npoints; 1149 del = 4; 1150 nodespacing = 64; 1151 in_loop_cnt = npoints >> 4; 1152 for (i = n_stages - 1; i > 0; i--) { 1153 const WORD32 *twiddles = ptr_w; 1154 WORD32 *data = ptr_y; 1155 WORD32 w1h, w2h, w3h, w1l, w2l, w3l; 1156 WORD32 sec_loop_cnt; 1157 1158 for (k = in_loop_cnt; k != 0; k--) { 1159 x0r = (*data); 1160 x0i = (*(data + 1)); 1161 data += (del << 1); 1162 1163 x1r = (*data); 1164 x1i = (*(data + 1)); 1165 data += (del << 1); 1166 1167 x2r = (*data); 1168 x2i = (*(data + 1)); 1169 data += (del << 1); 1170 1171 x3r = (*data); 1172 x3i = (*(data + 1)); 1173 data -= 3 * (del << 1); 1174 1175 x0r = ixheaacd_add32_sat(x0r, x2r); 1176 x0i = ixheaacd_add32_sat(x0i, x2i); 1177 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 1178 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 1179 x1r = ixheaacd_add32_sat(x1r, x3r); 1180 x1i = ixheaacd_add32_sat(x1i, x3i); 1181 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 1182 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 1183 1184 x0r = ixheaacd_add32_sat(x0r, x1r); 1185 x0i = ixheaacd_add32_sat(x0i, x1i); 1186 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1187 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1188 x2r = ixheaacd_sub32_sat(x2r, x3i); 1189 x2i = ixheaacd_add32_sat(x2i, x3r); 1190 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1191 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1192 1193 *data = x0r; 1194 *(data + 1) = x0i; 1195 data += (del << 1); 1196 1197 *data = x2r; 1198 *(data + 1) = x2i; 1199 data += (del << 1); 1200 1201 *data = x1r; 1202 *(data + 1) = x1i; 1203 data += (del << 1); 1204 1205 *data = x3i; 1206 *(data + 1) = x3r; 1207 data += (del << 1); 1208 } 1209 data = ptr_y + 2; 1210 1211 sec_loop_cnt = (nodespacing * del); 1212 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - 1213 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - 1214 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - 1215 (sec_loop_cnt / 256); 1216 j = nodespacing; 1217 1218 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { 1219 w1h = *(twiddles + 2 * j); 1220 w2h = *(twiddles + 2 * (j << 1)); 1221 w3h = *(twiddles + 2 * j + 2 * (j << 1)); 1222 w1l = *(twiddles + 2 * j + 1); 1223 w2l = *(twiddles + 2 * (j << 1) + 1); 1224 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); 1225 1226 for (k = in_loop_cnt; k != 0; k--) { 1227 WORD32 tmp; 1228 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1229 1230 data += (del << 1); 1231 1232 x1r = *data; 1233 x1i = *(data + 1); 1234 data += (del << 1); 1235 1236 x2r = *data; 1237 x2i = *(data + 1); 1238 data += (del << 1); 1239 1240 x3r = *data; 1241 x3i = *(data + 1); 1242 data -= 3 * (del << 1); 1243 1244 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l), 1245 ixheaacd_mult32_sat(x1i, w1h)); 1246 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 1247 x1r = tmp; 1248 1249 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l), 1250 ixheaacd_mult32_sat(x2i, w2h)); 1251 x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); 1252 x2r = tmp; 1253 1254 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l), 1255 ixheaacd_mult32_sat(x3i, w3h)); 1256 x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); 1257 x3r = tmp; 1258 1259 x0r = (*data); 1260 x0i = (*(data + 1)); 1261 1262 x0r = ixheaacd_add32_sat(x0r, x2r); 1263 x0i = ixheaacd_add32_sat(x0i, x2i); 1264 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 1265 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 1266 x1r = ixheaacd_add32_sat(x1r, x3r); 1267 x1i = ixheaacd_add32_sat(x1i, x3i); 1268 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 1269 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 1270 1271 x0r = ixheaacd_add32_sat(x0r, x1r); 1272 x0i = ixheaacd_add32_sat(x0i, x1i); 1273 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1274 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1275 x2r = ixheaacd_sub32_sat(x2r, x3i); 1276 x2i = ixheaacd_add32_sat(x2i, x3r); 1277 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1278 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1279 1280 *data = x0r; 1281 *(data + 1) = x0i; 1282 data += (del << 1); 1283 1284 *data = x2r; 1285 *(data + 1) = x2i; 1286 data += (del << 1); 1287 1288 *data = x1r; 1289 *(data + 1) = x1i; 1290 data += (del << 1); 1291 1292 *data = x3i; 1293 *(data + 1) = x3r; 1294 data += (del << 1); 1295 } 1296 data -= 2 * npoints; 1297 data += 2; 1298 } 1299 for (; j <= (nodespacing * del) >> 1; j += nodespacing) { 1300 w1h = *(twiddles + 2 * j); 1301 w2h = *(twiddles + 2 * (j << 1)); 1302 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 1303 w1l = *(twiddles + 2 * j + 1); 1304 w2l = *(twiddles + 2 * (j << 1) + 1); 1305 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 1306 1307 for (k = in_loop_cnt; k != 0; k--) { 1308 WORD32 tmp; 1309 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1310 1311 data += (del << 1); 1312 1313 x1r = *data; 1314 x1i = *(data + 1); 1315 data += (del << 1); 1316 1317 x2r = *data; 1318 x2i = *(data + 1); 1319 data += (del << 1); 1320 1321 x3r = *data; 1322 x3i = *(data + 1); 1323 data -= 3 * (del << 1); 1324 1325 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l), 1326 ixheaacd_mult32_sat(x1i, w1h)); 1327 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 1328 x1r = tmp; 1329 1330 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l), 1331 ixheaacd_mult32_sat(x2i, w2h)); 1332 x2i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x2r, w2h), x2i, w2l); 1333 x2r = tmp; 1334 1335 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h), 1336 ixheaacd_mult32_sat(x3i, w3l)); 1337 x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l), 1338 ixheaacd_mult32_sat(x3i, w3h)); 1339 x3r = tmp; 1340 1341 x0r = (*data); 1342 x0i = (*(data + 1)); 1343 1344 x0r = ixheaacd_add32_sat(x0r, x2r); 1345 x0i = ixheaacd_add32_sat(x0i, x2i); 1346 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 1347 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 1348 x1r = ixheaacd_add32_sat(x1r, x3r); 1349 x1i = ixheaacd_add32_sat(x1i, x3i); 1350 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 1351 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 1352 1353 x0r = ixheaacd_add32_sat(x0r, x1r); 1354 x0i = ixheaacd_add32_sat(x0i, x1i); 1355 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1356 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1357 x2r = ixheaacd_sub32_sat(x2r, x3i); 1358 x2i = ixheaacd_add32_sat(x2i, x3r); 1359 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1360 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1361 1362 *data = x0r; 1363 *(data + 1) = x0i; 1364 data += (del << 1); 1365 1366 *data = x2r; 1367 *(data + 1) = x2i; 1368 data += (del << 1); 1369 1370 *data = x1r; 1371 *(data + 1) = x1i; 1372 data += (del << 1); 1373 1374 *data = x3i; 1375 *(data + 1) = x3r; 1376 data += (del << 1); 1377 } 1378 data -= 2 * npoints; 1379 data += 2; 1380 } 1381 for (; j <= sec_loop_cnt * 2; j += nodespacing) { 1382 w1h = *(twiddles + 2 * j); 1383 w2h = *(twiddles + 2 * (j << 1) - 512); 1384 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 1385 w1l = *(twiddles + 2 * j + 1); 1386 w2l = *(twiddles + 2 * (j << 1) - 511); 1387 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 1388 1389 for (k = in_loop_cnt; k != 0; k--) { 1390 WORD32 tmp; 1391 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1392 1393 data += (del << 1); 1394 1395 x1r = *data; 1396 x1i = *(data + 1); 1397 data += (del << 1); 1398 1399 x2r = *data; 1400 x2i = *(data + 1); 1401 data += (del << 1); 1402 1403 x3r = *data; 1404 x3i = *(data + 1); 1405 data -= 3 * (del << 1); 1406 1407 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l), 1408 ixheaacd_mult32_sat(x1i, w1h)); 1409 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 1410 x1r = tmp; 1411 1412 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h), 1413 ixheaacd_mult32_sat(x2i, w2l)); 1414 x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l), 1415 ixheaacd_mult32_sat(x2i, w2h)); 1416 x2r = tmp; 1417 1418 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x3r, w3h), 1419 ixheaacd_mult32_sat(x3i, w3l)); 1420 x3i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l), 1421 ixheaacd_mult32_sat(x3i, w3h)); 1422 x3r = tmp; 1423 1424 x0r = (*data); 1425 x0i = (*(data + 1)); 1426 1427 x0r = ixheaacd_add32_sat(x0r, x2r); 1428 x0i = ixheaacd_add32_sat(x0i, x2i); 1429 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 1430 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 1431 x1r = ixheaacd_add32_sat(x1r, x3r); 1432 x1i = ixheaacd_add32_sat(x1i, x3i); 1433 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 1434 x3i = ixheaacd_sub32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 1435 1436 x0r = ixheaacd_add32_sat(x0r, x1r); 1437 x0i = ixheaacd_add32_sat(x0i, x1i); 1438 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1439 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1440 x2r = ixheaacd_sub32_sat(x2r, x3i); 1441 x2i = ixheaacd_add32_sat(x2i, x3r); 1442 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1443 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1444 1445 *data = x0r; 1446 *(data + 1) = x0i; 1447 data += (del << 1); 1448 1449 *data = x2r; 1450 *(data + 1) = x2i; 1451 data += (del << 1); 1452 1453 *data = x1r; 1454 *(data + 1) = x1i; 1455 data += (del << 1); 1456 1457 *data = x3i; 1458 *(data + 1) = x3r; 1459 data += (del << 1); 1460 } 1461 data -= 2 * npoints; 1462 data += 2; 1463 } 1464 for (; j < nodespacing * del; j += nodespacing) { 1465 w1h = *(twiddles + 2 * j); 1466 w2h = *(twiddles + 2 * (j << 1) - 512); 1467 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); 1468 w1l = *(twiddles + 2 * j + 1); 1469 w2l = *(twiddles + 2 * (j << 1) - 511); 1470 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); 1471 1472 for (k = in_loop_cnt; k != 0; k--) { 1473 WORD32 tmp; 1474 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1475 1476 data += (del << 1); 1477 1478 x1r = *data; 1479 x1i = *(data + 1); 1480 data += (del << 1); 1481 1482 x2r = *data; 1483 x2i = *(data + 1); 1484 data += (del << 1); 1485 1486 x3r = *data; 1487 x3i = *(data + 1); 1488 data -= 3 * (del << 1); 1489 1490 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l), 1491 ixheaacd_mult32_sat(x1i, w1h)); 1492 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 1493 x1r = tmp; 1494 1495 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x2r, w2h), 1496 ixheaacd_mult32_sat(x2i, w2l)); 1497 x2i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x2r, w2l), 1498 ixheaacd_mult32_sat(x2i, w2h)); 1499 x2r = tmp; 1500 1501 tmp = -ixheaacd_add32_sat(ixheaacd_mult32_sat(x3r, w3l), 1502 ixheaacd_mult32_sat(x3i, w3h)); 1503 x3i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x3r, w3h), x3i, w3l); 1504 x3r = tmp; 1505 1506 x0r = (*data); 1507 x0i = (*(data + 1)); 1508 1509 x0r = ixheaacd_add32_sat(x0r, x2r); 1510 x0i = ixheaacd_add32_sat(x0i, x2i); 1511 x2r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x2r, 1)); 1512 x2i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x2i, 1)); 1513 x1r = ixheaacd_add32_sat(x1r, x3r); 1514 x1i = ixheaacd_sub32_sat(x1i, x3i); 1515 x3r = ixheaacd_sub32_sat(x1r, ixheaacd_shl32_sat(x3r, 1)); 1516 x3i = ixheaacd_add32_sat(x1i, ixheaacd_shl32_sat(x3i, 1)); 1517 1518 x0r = ixheaacd_add32_sat(x0r, x1r); 1519 x0i = ixheaacd_add32_sat(x0i, x1i); 1520 x1r = ixheaacd_sub32_sat(x0r, ixheaacd_shl32_sat(x1r, 1)); 1521 x1i = ixheaacd_sub32_sat(x0i, ixheaacd_shl32_sat(x1i, 1)); 1522 x2r = ixheaacd_sub32_sat(x2r, x3i); 1523 x2i = ixheaacd_add32_sat(x2i, x3r); 1524 x3i = ixheaacd_add32_sat(x2r, ixheaacd_shl32_sat(x3i, 1)); 1525 x3r = ixheaacd_sub32_sat(x2i, ixheaacd_shl32_sat(x3r, 1)); 1526 1527 *data = x0r; 1528 *(data + 1) = x0i; 1529 data += (del << 1); 1530 1531 *data = x2r; 1532 *(data + 1) = x2i; 1533 data += (del << 1); 1534 1535 *data = x1r; 1536 *(data + 1) = x1i; 1537 data += (del << 1); 1538 1539 *data = x3i; 1540 *(data + 1) = x3r; 1541 data += (del << 1); 1542 } 1543 data -= 2 * npoints; 1544 data += 2; 1545 } 1546 nodespacing >>= 2; 1547 del <<= 2; 1548 in_loop_cnt >>= 2; 1549 } 1550 if (not_power_4) { 1551 const WORD32 *twiddles = ptr_w; 1552 nodespacing <<= 1; 1553 shift += 1; 1554 for (j = del / 2; j != 0; j--) { 1555 WORD32 w1h = *twiddles; 1556 WORD32 w1l = *(twiddles + 1); 1557 1558 WORD32 tmp; 1559 twiddles += nodespacing * 2; 1560 1561 x0r = *ptr_y; 1562 x0i = *(ptr_y + 1); 1563 ptr_y += (del << 1); 1564 1565 x1r = *ptr_y; 1566 x1i = *(ptr_y + 1); 1567 1568 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l), 1569 ixheaacd_mult32_sat(x1i, w1h)); 1570 x1i = ixheaacd_mac32_sat(-ixheaacd_mult32_sat(x1r, w1h), x1i, w1l); 1571 x1r = tmp; 1572 1573 *ptr_y = (x0r) / 2 - (x1r) / 2; 1574 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1575 ptr_y -= (del << 1); 1576 1577 *ptr_y = (x0r) / 2 + (x1r) / 2; 1578 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1579 ptr_y += 2; 1580 } 1581 twiddles = ptr_w; 1582 for (j = del / 2; j != 0; j--) { 1583 WORD32 w1h = *twiddles; 1584 WORD32 w1l = *(twiddles + 1); 1585 WORD32 tmp; 1586 twiddles += nodespacing * 2; 1587 1588 x0r = *ptr_y; 1589 x0i = *(ptr_y + 1); 1590 ptr_y += (del << 1); 1591 1592 x1r = *ptr_y; 1593 x1i = *(ptr_y + 1); 1594 1595 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(x1r, w1h), 1596 ixheaacd_mult32_sat(x1i, w1l)); 1597 x1i = ixheaacd_add32_sat(ixheaacd_mult32_sat(x1r, w1l), 1598 ixheaacd_mult32_sat(x1i, w1h)); 1599 x1r = tmp; 1600 1601 *ptr_y = (x0r) / 2 - (x1r) / 2; 1602 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1603 ptr_y -= (del << 1); 1604 1605 *ptr_y = (x0r) / 2 + (x1r) / 2; 1606 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1607 ptr_y += 2; 1608 } 1609 } 1610 } 1611 1612 for (i = 0; i < nlength; i++) { 1613 xr[i] = y[2 * i]; 1614 xi[i] = y[2 * i + 1]; 1615 } 1616 1617 *preshift = shift - *preshift; 1618 return; 1619 } 1620 1621 static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op, 1622 WORD32 sign_dir) { 1623 WORD32 add_r, sub_r; 1624 WORD32 add_i, sub_i; 1625 WORD32 temp_real, temp_imag, temp; 1626 1627 WORD32 p1, p2, p3, p4; 1628 1629 WORD32 sinmu; 1630 sinmu = -1859775393 * sign_dir; 1631 1632 temp_real = ixheaacd_add32_sat(inp[0], inp[2]); 1633 temp_imag = ixheaacd_add32_sat(inp[1], inp[3]); 1634 1635 add_r = ixheaacd_add32_sat(inp[2], inp[4]); 1636 add_i = ixheaacd_add32_sat(inp[3], inp[5]); 1637 1638 sub_r = ixheaacd_sub32_sat(inp[2], inp[4]); 1639 sub_i = ixheaacd_sub32_sat(inp[3], inp[5]); 1640 1641 p1 = add_r >> 1; 1642 p4 = add_i >> 1; 1643 p2 = ixheaacd_mult32_shl(sub_i, sinmu); 1644 p3 = ixheaacd_mult32_shl(sub_r, sinmu); 1645 1646 temp = ixheaacd_sub32(inp[0], p1); 1647 1648 op[0] = ixheaacd_add32_sat(temp_real, inp[4]); 1649 op[1] = ixheaacd_add32_sat(temp_imag, inp[5]); 1650 op[2] = ixheaacd_add32_sat(temp, p2); 1651 op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4); 1652 op[4] = ixheaacd_sub32_sat(temp, p2); 1653 op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4); 1654 1655 return; 1656 } 1657 1658 VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength, 1659 WORD32 fft_mode, WORD32 *preshift) { 1660 WORD32 i, j; 1661 WORD32 shift = 0; 1662 WORD32 xr_3[384]; 1663 WORD32 xi_3[384]; 1664 WORD32 x[1024]; 1665 WORD32 y[1024]; 1666 WORD32 cnfac, npts; 1667 WORD32 mpass = nlength; 1668 WORD32 n = 0; 1669 WORD32 *ptr_x = x; 1670 WORD32 *ptr_y = y; 1671 1672 cnfac = 0; 1673 while (mpass % 3 == 0) { 1674 mpass /= 3; 1675 cnfac++; 1676 } 1677 npts = mpass; 1678 1679 for (i = 0; i < 3 * cnfac; i++) { 1680 for (j = 0; j < mpass; j++) { 1681 xr_3[j] = xr[3 * j + i]; 1682 xi_3[j] = xi[3 * j + i]; 1683 } 1684 1685 (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift); 1686 1687 for (j = 0; j < mpass; j++) { 1688 xr[3 * j + i] = xr_3[j]; 1689 xi[3 * j + i] = xi_3[j]; 1690 } 1691 } 1692 1693 while (npts >> 1) { 1694 n++; 1695 npts = npts >> 1; 1696 } 1697 1698 if (n % 2 == 0) 1699 shift = ((n + 4)) / 2; 1700 else 1701 shift = ((n + 5) / 2); 1702 1703 *preshift = shift - *preshift + 1; 1704 1705 for (i = 0; i < nlength; i++) { 1706 ptr_x[2 * i] = (xr[i] >> 1); 1707 ptr_x[2 * i + 1] = (xi[i] >> 1); 1708 } 1709 1710 { 1711 const WORD32 *w1r, *w1i; 1712 WORD32 tmp; 1713 w1r = ixheaacd_twiddle_table_3pr; 1714 w1i = ixheaacd_twiddle_table_3pi; 1715 1716 if (fft_mode < 0) { 1717 for (i = 0; i < nlength; i += 3) { 1718 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)), 1719 ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i))); 1720 ptr_x[2 * i + 1] = 1721 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i)), 1722 ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r))); 1723 ptr_x[2 * i] = tmp; 1724 1725 w1r++; 1726 w1i++; 1727 1728 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)), 1729 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i))); 1730 ptr_x[2 * i + 3] = 1731 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i)), 1732 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r))); 1733 ptr_x[2 * i + 2] = tmp; 1734 1735 w1r++; 1736 w1i++; 1737 1738 tmp = ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)), 1739 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i))); 1740 ptr_x[2 * i + 5] = 1741 ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i)), 1742 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r))); 1743 ptr_x[2 * i + 4] = tmp; 1744 1745 w1r += 3 * (128 / mpass - 1) + 1; 1746 w1i += 3 * (128 / mpass - 1) + 1; 1747 } 1748 } 1749 1750 else { 1751 for (i = 0; i < nlength; i += 3) { 1752 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i], (*w1r)), 1753 ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1i))); 1754 ptr_x[2 * i + 1] = 1755 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 1], (*w1r)), 1756 ixheaacd_mult32_sat(ptr_x[2 * i], (*w1i))); 1757 ptr_x[2 * i] = tmp; 1758 1759 w1r++; 1760 w1i++; 1761 1762 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1r)), 1763 ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1i))); 1764 ptr_x[2 * i + 3] = 1765 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 3], (*w1r)), 1766 ixheaacd_mult32_sat(ptr_x[2 * i + 2], (*w1i))); 1767 ptr_x[2 * i + 2] = tmp; 1768 1769 w1r++; 1770 w1i++; 1771 1772 tmp = ixheaacd_add32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1r)), 1773 ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1i))); 1774 ptr_x[2 * i + 5] = 1775 ixheaacd_sub32_sat(ixheaacd_mult32_sat(ptr_x[2 * i + 5], (*w1r)), 1776 ixheaacd_mult32_sat(ptr_x[2 * i + 4], (*w1i))); 1777 ptr_x[2 * i + 4] = tmp; 1778 1779 w1r += 3 * (128 / mpass - 1) + 1; 1780 w1i += 3 * (128 / mpass - 1) + 1; 1781 } 1782 } 1783 } 1784 1785 for (i = 0; i < mpass; i++) { 1786 ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode); 1787 1788 ptr_x = ptr_x + 6; 1789 ptr_y = ptr_y + 6; 1790 } 1791 1792 for (i = 0; i < mpass; i++) { 1793 xr[i] = y[6 * i]; 1794 xi[i] = y[6 * i + 1]; 1795 } 1796 1797 for (i = 0; i < mpass; i++) { 1798 xr[mpass + i] = y[6 * i + 2]; 1799 xi[mpass + i] = y[6 * i + 3]; 1800 } 1801 1802 for (i = 0; i < mpass; i++) { 1803 xr[2 * mpass + i] = y[6 * i + 4]; 1804 xi[2 * mpass + i] = y[6 * i + 5]; 1805 } 1806 return; 1807 } 1808 1809 VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, 1810 WORD32 fft_mode, WORD32 *preshift) { 1811 if (nlength & (nlength - 1)) { 1812 if ((nlength != 24) && (nlength != 48) && (nlength != 96) && 1813 (nlength != 192) && (nlength != 384)) { 1814 printf("%d point FFT not supported", nlength); 1815 exit(0); 1816 } 1817 ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift); 1818 } else 1819 (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift); 1820 1821 return; 1822 } 1823