1.text 2.p2align 2 3.global ixheaacd_complex_fft_p2_asm 4.type ixheaacd_complex_fft_p2_asm, %function 5 6ixheaacd_complex_fft_p2_asm: 7 STMFD sp!, {r0-r12, lr} 8 SUB sp, sp, #0x44 9 LDR r0, [sp, #0x48] 10 EOR r0, r0, r0, ASR #31 11 CLZ r0, r0 12 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 13 SUB r0, r0, #1 14 RSB r0, r0, #0x1e 15 AND r1, r0, #1 16 STR r1, [sp, #0x30] 17 MOV r1, r0, ASR #1 18 LDR r0, [sp, #0x48] @npoints 19 STR r1, [sp, #0x18] 20 MOV lr, r0, LSL #1 @(npoints >>1) * 4 21 MOV r0, #0 22 23FIRST_STAGE_R4: 24 MOVW r4, #0x3333 25 MOVT r4, #0x3333 26 MOVW r5, #0x0F0F 27 MOVT r5, #0x0F0F 28 AND r6, r4, r0 29 AND r7, r4, r0, LSR #2 30 ORR r4, r7, r6, LSL #2 31 AND r6, r5, r4 32 AND r7, r5, r4, LSR #4 33 ORR r4, r7, r6, LSL #4 34 BIC r6, r4, #0x0000FF00 35 BIC r7, r4, #0x00FF0000 36 MOV r7, r7, LSR #8 37 ORR r4, r7, r6, LSL #8 38 LDR r5, [sp, #0x30] 39 MOV r10, r4, LSR r12 40 CMP r5, #0 41 ADDNE r10, r10, #1 42 BICNE r10, r10, #1 43 44 ADD r1, r2, r10, LSL #2 45 LDRD r4, [r1] @r4=x0r, r5=x0i 46 ADD r1, r1, lr 47 LDRD r8, [r1] @r8=x1r, r9=x1i 48 ADD r1, r1, lr 49 LDRD r6, [r1] @r6=x2r, r7=x2i 50 ADD r1, r1, lr 51 LDRD r10, [r1] @r10=x3r, r11=x3i 52 ADD r0, r0, #4 53 CMP r0, lr, ASR #1 54 55 ADD r4, r4, r6 @x0r = x0r + x2r@ 56 ADD r5, r5, r7 @x0i = x0i + x2i@ 57 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 58 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 59 ADD r8, r8, r10 @x1r = x1r + x3r@ 60 ADD r9, r9, r11 @x1i = x1i + x3i@ 61 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 62 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 63 64 ADD r4, r4, r8 @x0r = x0r + x1r@ 65 ADD r5, r5, r9 @x0i = x0i + x1i@ 66 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 67 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 68 ADD r6, r6, r11 @x2r = x2r + x3i@ 69 SUB r7, r7, r1 @x2i = x2i - x3r@ 70 SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 71 ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@ 72 73 STMIA r3!, {r4-r11} 74 BLT FIRST_STAGE_R4 75 LDR r1, [sp, #0x18] 76 LDR r0, [sp, #0x48] 77 MOV r12, #0x40 @nodespacing = 64@ 78 STR r12, [sp, #0x38] 79 LDR r12, [sp, #0x48] 80 SUB r3, r3, r0, LSL #3 81 SUBS r1, r1, #1 82 STR r3, [sp, #0x50] 83 MOV r4, r12, ASR #4 84 MOV r0, #4 85 STR r4, [sp, #0x34] 86 STR r1, [sp, #0x3c] 87 BLE RADIX2 88OUTER_LOOP: 89 LDR r1, [sp, #0x44] 90 LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ 91 STR r1, [sp, #0x2c] 92 LDR r1, [sp, #0x34] 93 94 MOV r0, r0, LSL #3 @(del<<1) * 4 95LOOP_TRIVIAL_TWIDDLE: 96 LDRD r4, [r12] @r4=x0r, r5=x0i 97 ADD r12, r12, r0 98 LDRD r6, [r12] @r6=x1r, r7=x1i 99 ADD r12, r12, r0 100 LDRD r8, [r12] @r8=x2r, r9=x2i 101 ADD r12, r12, r0 102 LDRD r10, [r12] @r10=x3r, r11=x3i 103 104@MOV r4,r4,ASR #1 105@MOV r5,r5,ASR #1 106@MOV r6,r6,ASR #1 107@MOV r7,r7,ASR #1 108@MOV r8,r8,ASR #1 109@MOV r9,r9,ASR #1 110@MOV r10,r10,ASR #1 111@MOV r11,r11,ASR #1 112 113 ADD r4, r4, r8 @x0r = x0r + x2r@ 114 ADD r5, r5, r9 @x0i = x0i + x2i@ 115 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 116 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 117 ADD r6, r6, r10 @x1r = x1r + x3r@ 118 ADD r7, r7, r11 @x1i = x1i + x3i@ 119 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 120 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 121 122 ADD r4, r4, r6 @x0r = x0r + x1r@ 123 ADD r5, r5, r7 @x0i = x0i + x1i@ 124@MOV r4,r4,ASR #1 125@MOV r5,r5,ASR #1 126 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 127 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 128 ADD r8, r8, r11 @x2r = x2r + x3i@ 129 SUB r9, r9, r2 @x2i = x2i - x3r@ 130 SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 131 ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1) 132 133 STRD r10, [r12] @r10=x3r, r11=x3i 134 SUB r12, r12, r0 135 STRD r6, [r12] @r6=x1r, r7=x1i 136 SUB r12, r12, r0 137 STRD r8, [r12] @r8=x2r, r9=x2i 138 SUB r12, r12, r0 139 STRD r4, [r12] @r4=x0r, r5=x0i 140 ADD r12, r12, r0, lsl #2 141 142 SUBS r1, r1, #1 143 BNE LOOP_TRIVIAL_TWIDDLE 144 145 MOV r0, r0, ASR #3 146 LDR r4, [sp, #0x38] 147 LDR r3, [sp, #0x50] 148 MUL r1, r0, r4 149 ADD r12, r3, #8 150 STR r1, [sp, #0x40] 151 MOV r3, r1, ASR #2 152 ADD r3, r3, r1, ASR #3 153 SUB r3, r3, r1, ASR #4 154 ADD r3, r3, r1, ASR #5 155 SUB r3, r3, r1, ASR #6 156 ADD r3, r3, r1, ASR #7 157 SUB r3, r3, r1, ASR #8 158 STR r3, [sp, #0x18] 159SECOND_LOOP: 160 LDR r3, [sp, #0x2c] 161 LDR r14, [sp, #0x34] 162 MOV r0, r0, LSL #3 @(del<<1) * 4 163 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 164 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 165 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 166 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 167 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 168 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 169 170 STR r4, [sp, #0x24] 171 STR r1, [sp, #0x14] 172 STR r2, [sp, #0x10] 173 STR r5, [sp, #0x0c] 174 STR r6, [sp, #0x08] 175 STR r7, [sp, #0x04] 176 STR r8, [sp] 177 178RADIX4_BFLY: 179 180 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 181 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 182 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 183 SUBS r14, r14, #1 184 185 LDR r1, [sp, #0x14] 186 LDR r2, [sp, #0x10] 187 188 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 189 LSR r3, r3, #31 190 ORR r4, r3, r4, LSL#1 191 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 192 LSR r3, r3, #31 193 ORR r6, r3, r6, LSL#1 194 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 195 LSR r3, r3, #31 196 ORR r5, r3, r5, LSL#1 197 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 198 LSR r3, r3, #31 199 ORR r7, r3, r7, LSL#1 200 ADD r7, r7, r6 201 SUB r6, r4, r5 @ 202 203 LDR r1, [sp, #0x0c] 204 LDR r2, [sp, #0x08] 205 206 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 207 LSR r3, r3, #31 208 ORR r4, r3, r4, LSL#1 209 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 210 LSR r3, r3, #31 211 ORR r8, r3, r8, LSL#1 212 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 213 LSR r3, r3, #31 214 ORR r5, r3, r5, LSL#1 215 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 216 LSR r3, r3, #31 217 ORR r9, r3, r9, LSL#1 218 ADD r9, r9, r8 219 SUB r8, r4, r5 @ 220 221 LDR r1, [sp, #0x04] 222 LDR r2, [sp] 223 224 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 225 LSR r3, r3, #31 226 ORR r4, r3, r4, LSL#1 227 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 228 LSR r3, r3, #31 229 ORR r10, r3, r10, LSL#1 230 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 231 LSR r3, r3, #31 232 ORR r5, r3, r5, LSL#1 233 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 234 LSR r3, r3, #31 235 ORR r11, r3, r11, LSL#1 236 ADD r11, r11, r10 237 SUB r10, r4, r5 @ 238 239 @SUB r12,r12,r0,lsl #1 240 @LDRD r4,[r12] @r4=x0r, r5=x0i 241 LDR r4, [r12, -r0, lsl #1]! @ 242 LDR r5, [r12, #0x04] 243 244 245 ADD r4, r8, r4 @x0r = x0r + x2r@ 246 ADD r5, r9, r5 @x0i = x0i + x2i@ 247 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 248 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 249 ADD r6, r6, r10 @x1r = x1r + x3r@ 250 ADD r7, r7, r11 @x1i = x1i + x3i@ 251 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 252 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 253 254 ADD r4, r4, r6 @x0r = x0r + x1r@ 255 ADD r5, r5, r7 @x0i = x0i + x1i@ 256 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 257 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 258 STRD r4, [r12] @r4=x0r, r5=x0i 259 ADD r12, r12, r0 260 261 ADD r8, r8, r11 @x2r = x2r + x3i@ 262 SUB r9, r9, r10 @x2i = x2i - x3r@ 263 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 264 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 265 266 STRD r8, [r12] @r8=x2r, r9=x2i 267 ADD r12, r12, r0 268 STRD r6, [r12] @r6=x1r, r7=x1i 269 ADD r12, r12, r0 270 STRD r4, [r12] @r10=x3r, r11=x3i 271 ADD r12, r12, r0 272 273 BNE RADIX4_BFLY 274 MOV r0, r0, ASR #3 275 276 LDR r1, [sp, #0x48] 277 LDR r4, [sp, #0x24] 278 SUB r1, r12, r1, LSL #3 279 LDR r6, [sp, #0x38] 280 ADD r12, r1, #8 281 LDR r7, [sp, #0x18] 282 ADD r4, r4, r6 283 CMP r4, r7 284 BLE SECOND_LOOP 285 286SECOND_LOOP_2: 287 LDR r3, [sp, #0x2c] 288 LDR r14, [sp, #0x34] 289 MOV r0, r0, LSL #3 @(del<<1) * 4 290 291 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 292 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 293 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 294 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 295 SUB r3, r3, #2048 @ 512 *4 296 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 297 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 298 299 STR r4, [sp, #0x24] 300 301 STR r1, [sp, #0x14] 302 STR r2, [sp, #0x10] 303 STR r5, [sp, #0x0c] 304 STR r6, [sp, #0x08] 305 STR r7, [sp, #0x04] 306 STR r8, [sp] 307 308RADIX4_BFLY_2: 309 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 310 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 311 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 312 SUBS r14, r14, #1 313 LDR r1, [sp, #0x14] 314 LDR r2, [sp, #0x10] 315 316 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 317 LSR r3, r3, #31 318 ORR r4, r3, r4, LSL#1 319 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 320 LSR r3, r3, #31 321 ORR r6, r3, r6, LSL#1 322 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 323 LSR r3, r3, #31 324 ORR r5, r3, r5, LSL#1 325 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 326 LSR r3, r3, #31 327 ORR r7, r3, r7, LSL#1 328 ADD r7, r7, r6 329 SUB r6, r4, r5 @ 330 331 LDR r1, [sp, #0x0c] 332 LDR r2, [sp, #0x08] 333 334 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 335 LSR r3, r3, #31 336 ORR r4, r3, r4, LSL#1 337 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 338 LSR r3, r3, #31 339 ORR r8, r3, r8, LSL#1 340 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 341 LSR r3, r3, #31 342 ORR r5, r3, r5, LSL#1 343 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 344 LSR r3, r3, #31 345 ORR r9, r3, r9, LSL#1 346 ADD r9, r9, r8 347 SUB r8, r4, r5 @ 348 349 LDR r1, [sp, #0x04] 350 LDR r2, [sp] 351 352 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 353 LSR r3, r3, #31 354 ORR r4, r3, r4, LSL#1 355 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 356 LSR r3, r3, #31 357 ORR r10, r3, r10, LSL#1 358 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 359 LSR r3, r3, #31 360 ORR r5, r3, r5, LSL#1 361 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 362 LSR r3, r3, #31 363 ORR r11, r3, r11, LSL#1 364 ADD r10, r11, r10 365 SUB r11, r5, r4 @ 366 367 @SUB r12,r12,r0,lsl #1 368 @LDRD r4,[r12] @r4=x0r, r5=x0i 369 LDR r4, [r12, -r0, lsl #1]! @ 370 LDR r5, [r12, #0x04] 371 372 373 ADD r4, r8, r4 @x0r = x0r + x2r@ 374 ADD r5, r9, r5 @x0i = x0i + x2i@ 375 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 376 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 377 ADD r6, r6, r10 @x1r = x1r + x3r@ 378 ADD r7, r7, r11 @x1i = x1i + x3i@ 379 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 380 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 381 382 ADD r4, r4, r6 @x0r = x0r + x1r@ 383 ADD r5, r5, r7 @x0i = x0i + x1i@ 384 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 385 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 386 STRD r4, [r12] @r4=x0r, r5=x0i 387 ADD r12, r12, r0 388 389 ADD r8, r8, r11 @x2r = x2r + x3i@ 390 SUB r9, r9, r10 @x2i = x2i - x3r@ 391 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 392 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 393 394 STRD r8, [r12] @r8=x2r, r9=x2i 395 ADD r12, r12, r0 396 STRD r6, [r12] @r6=x1r, r7=x1i 397 ADD r12, r12, r0 398 STRD r4, [r12] @r10=x3r, r11=x3i 399 ADD r12, r12, r0 400 401 BNE RADIX4_BFLY_2 402 MOV r0, r0, ASR #3 403 404 LDR r1, [sp, #0x48] 405 LDR r4, [sp, #0x24] 406 SUB r1, r12, r1, LSL #3 407 LDR r6, [sp, #0x38] 408 ADD r12, r1, #8 409 LDR r7, [sp, #0x40] 410 ADD r4, r4, r6 411 CMP r4, r7, ASR #1 412 BLE SECOND_LOOP_2 413 LDR r7, [sp, #0x18] 414 CMP r4, r7, LSL #1 415 BGT SECOND_LOOP_4 416 417SECOND_LOOP_3: 418 LDR r3, [sp, #0x2c] 419 LDR r14, [sp, #0x34] 420 MOV r0, r0, LSL #3 @(del<<1) * 4 421 422 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 423 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 424 SUB r3, r3, #2048 @ 512 *4 425 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 426 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 427 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 428 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 429 430 STR r4, [sp, #0x24] 431 STR r1, [sp, #0x14] 432 STR r2, [sp, #0x10] 433 STR r5, [sp, #0x0c] 434 STR r6, [sp, #0x08] 435 STR r7, [sp, #0x04] 436 STR r8, [sp] 437 438 439RADIX4_BFLY_3: 440 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 441 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 442 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 443 SUBS r14, r14, #1 444 445 LDR r1, [sp, #0x14] 446 LDR r2, [sp, #0x10] 447 448 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 449 LSR r3, r3, #31 450 ORR r4, r3, r4, LSL#1 451 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 452 LSR r3, r3, #31 453 ORR r6, r3, r6, LSL#1 454 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 455 LSR r3, r3, #31 456 ORR r5, r3, r5, LSL#1 457 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 458 LSR r3, r3, #31 459 ORR r7, r3, r7, LSL#1 460 ADD r7, r7, r6 461 SUB r6, r4, r5 @ 462 463 LDR r1, [sp, #0x0c] 464 LDR r2, [sp, #0x08] 465 466 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 467 LSR r3, r3, #31 468 ORR r4, r3, r4, LSL#1 469 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 470 LSR r3, r3, #31 471 ORR r8, r3, r8, LSL#1 472 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 473 LSR r3, r3, #31 474 ORR r5, r3, r5, LSL#1 475 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 476 LSR r3, r3, #31 477 ORR r9, r3, r9, LSL#1 478 ADD r8, r9, r8 479 SUB r9, r5, r4 @ 480 481 LDR r1, [sp, #0x04] 482 LDR r2, [sp] 483 484 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 485 LSR r3, r3, #31 486 ORR r4, r3, r4, LSL#1 487 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 488 LSR r3, r3, #31 489 ORR r10, r3, r10, LSL#1 490 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 491 LSR r3, r3, #31 492 ORR r5, r3, r5, LSL#1 493 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 494 LSR r3, r3, #31 495 ORR r11, r3, r11, LSL#1 496 ADD r10, r11, r10 497 SUB r11, r5, r4 @ 498 499 @SUB r12,r12,r0,lsl #1 500 @LDRD r4,[r12] @r4=x0r, r5=x0i 501 LDR r4, [r12, -r0, lsl #1]! @ 502 LDR r5, [r12, #0x04] 503 504 505 ADD r4, r8, r4 @x0r = x0r + x2r@ 506 ADD r5, r9, r5 @x0i = x0i + x2i@ 507 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 508 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 509 ADD r6, r6, r10 @x1r = x1r + x3r@ 510 ADD r7, r7, r11 @x1i = x1i + x3i@ 511 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 512 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 513 514 ADD r4, r4, r6 @x0r = x0r + x1r@ 515 ADD r5, r5, r7 @x0i = x0i + x1i@ 516 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 517 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 518 STRD r4, [r12] @r4=x0r, r5=x0i 519 ADD r12, r12, r0 520 521 ADD r8, r8, r11 @x2r = x2r + x3i@ 522 SUB r9, r9, r10 @x2i = x2i - x3r@ 523 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 524 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 525 526 STRD r8, [r12] @r8=x2r, r9=x2i 527 ADD r12, r12, r0 528 STRD r6, [r12] @r6=x1r, r7=x1i 529 ADD r12, r12, r0 530 STRD r4, [r12] @r10=x3r, r11=x3i 531 ADD r12, r12, r0 532 533 BNE RADIX4_BFLY_3 534 MOV r0, r0, ASR #3 535 536 LDR r1, [sp, #0x48] 537 LDR r4, [sp, #0x24] 538 SUB r1, r12, r1, LSL #3 539 LDR r6, [sp, #0x38] 540 ADD r12, r1, #8 541 LDR r7, [sp, #0x18] 542 ADD r4, r4, r6 543 CMP r4, r7, LSL #1 544 BLE SECOND_LOOP_3 545 546SECOND_LOOP_4: 547 LDR r3, [sp, #0x2c] 548 LDR r14, [sp, #0x34] 549 MOV r0, r0, LSL #3 @(del<<1) * 4 550 551 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 552 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 553 SUB r3, r3, #2048 @ 512 *4 554 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 555 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 556 SUB r3, r3, #2048 @ 512 *4 557 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 558 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 559 560 561 STR r4, [sp, #0x24] 562 STR r1, [sp, #0x14] 563 STR r2, [sp, #0x10] 564 STR r5, [sp, #0x0c] 565 STR r6, [sp, #0x08] 566 STR r7, [sp, #0x04] 567 STR r8, [sp] 568 569RADIX4_BFLY_4: 570 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 571 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 572 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 573 SUBS r14, r14, #1 574 575 LDR r1, [sp, #0x14] 576 LDR r2, [sp, #0x10] 577 578 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 579 LSR r3, r3, #31 580 ORR r4, r3, r4, LSL#1 581 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 582 LSR r3, r3, #31 583 ORR r6, r3, r6, LSL#1 584 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 585 LSR r3, r3, #31 586 ORR r5, r3, r5, LSL#1 587 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 588 LSR r3, r3, #31 589 ORR r7, r3, r7, LSL#1 590 ADD r7, r7, r6 591 SUB r6, r4, r5 @ 592 593 LDR r1, [sp, #0x0c] 594 LDR r2, [sp, #0x08] 595 596 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 597 LSR r3, r3, #31 598 ORR r4, r3, r4, LSL#1 599 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 600 LSR r3, r3, #31 601 ORR r8, r3, r8, LSL#1 602 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 603 LSR r3, r3, #31 604 ORR r5, r3, r5, LSL#1 605 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 606 LSR r3, r3, #31 607 ORR r9, r3, r9, LSL#1 608 ADD r8, r9, r8 609 SUB r9, r5, r4 @ 610 611 LDR r1, [sp, #0x04] 612 LDR r2, [sp] 613 614 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 615 LSR r3, r3, #31 616 ORR r4, r3, r4, LSL#1 617 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 618 LSR r3, r3, #31 619 ORR r10, r3, r10, LSL#1 620 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 621 LSR r3, r3, #31 622 ORR r5, r3, r5, LSL#1 623 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 624 LSR r3, r3, #31 625 ORR r11, r3, r11, LSL#1 626 ADD r11, r11, r10 627 SUB r10, r5, r4 @ 628 629 @SUB r12,r12,r0,lsl #1 630 @LDRD r4,[r12] @r4=x0r, r5=x0i 631 LDR r4, [r12, -r0, lsl #1]! @ 632 LDR r5, [r12, #0x04] 633 634 635 ADD r4, r8, r4 @x0r = x0r + x2r@ 636 ADD r5, r9, r5 @x0i = x0i + x2i@ 637 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 638 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 639 ADD r6, r6, r10 @x1r = x1r + x3r@ 640 SUB r7, r7, r11 @x1i = x1i - x3i@ 641 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 642 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 643 644 ADD r4, r4, r6 @x0r = x0r + x1r@ 645 ADD r5, r5, r7 @x0i = x0i + x1i@ 646 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 647 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 648 STRD r4, [r12] @r4=x0r, r5=x0i 649 ADD r12, r12, r0 650 651 ADD r8, r8, r11 @x2r = x2r + x3i@ 652 SUB r9, r9, r10 @x2i = x2i - x3r@ 653 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 654 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 655 656 STRD r8, [r12] @r8=x2r, r9=x2i 657 ADD r12, r12, r0 658 STRD r6, [r12] @r6=x1r, r7=x1i 659 ADD r12, r12, r0 660 STRD r4, [r12] @r10=x3r, r11=x3i 661 ADD r12, r12, r0 662 663 BNE RADIX4_BFLY_4 664 MOV r0, r0, ASR #3 665 666 LDR r1, [sp, #0x48] 667 LDR r4, [sp, #0x24] 668 SUB r1, r12, r1, LSL #3 669 LDR r6, [sp, #0x38] 670 ADD r12, r1, #8 671 LDR r7, [sp, #0x40] 672 ADD r4, r4, r6 673 CMP r4, r7 674 BLT SECOND_LOOP_4 675 676 LDR r1, [sp, #0x38] 677 MOV r0, r0, LSL #2 678 MOV r1, r1, ASR #2 679 STR r1, [sp, #0x38] 680 LDR r1, [sp, #0x34] 681 MOV r1, r1, ASR #2 682 STR r1, [sp, #0x34] 683 LDR r1, [sp, #0x3c] 684 SUBS r1, r1, #1 685 STR r1, [sp, #0x3c] 686 BGT OUTER_LOOP 687 688RADIX2: 689 LDR r1, [sp, #0x30] 690 CMP r1, #0 691 BEQ EXIT 692 LDR r12, [sp, #0x38] 693 LDR r1, [sp, #0x44] 694 CMP r12, #0 695 MOVEQ r4, #1 696 MOVNE r4, r12, LSL #1 697 MOVS r3, r0 698 BEQ EXIT 699 700 MOV r3, r3, ASR #1 701 LDR r5, [sp, #0x50] 702 MOV r0, r0, LSL #3 @(del<<1) * 4 703 STR r1, [sp, #0x18] 704RADIX2_BFLY: 705 LDR r1, [sp, #0x18] 706 LDRD r6, [r5] @r6 = x0r 707 ADD r5, r5, r0 708 LDRD r8, [r5] @r8 = x1r 709 710 LDR r2, [r1] 711 SUBS r3, r3, #1 712 713 714 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 715 LSR r1, r1, #31 716 ORR r11, r1, r11, LSL#1 717 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 718 LSR r1, r1, #31 719 ORR r10, r1, r10, LSL#1 720 721 722 LDR r1, [sp, #0x18] 723 LDR r2, [r1, #0x04] 724 ADD r1, r1, r4, LSL #3 725 STR r1, [sp, #0x18] 726 727 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 728 LSR r1, r1, #31 729 ORR r8, r1, r8, LSL#1 730 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 731 LSR r1, r1, #31 732 ORR r9, r1, r9, LSL#1 733 734 SUB r8, r8, r10 735 ADD r9, r9, r11 736 737 738 ADD r10, r8, r6 @(x0r/2) + (x1r/2) 739 ASR r10, r10, #1 740 ADD r11, r9, r7 @(x0i/2) + (x1i/2)@ 741 ASR r11, r11, #1 742 SUB r8, r6, r8 @(x0r/2) - (x1r/2) 743 ASR r8, r8, #1 744 SUB r9, r7, r9 @(x0i/2) - (x1i/2)@ 745 ASR r9, r9, #1 746 747 STRD r8, [r5] 748 SUB r5, r5, r0 749 STRD r10, [r5], #8 750 751 BNE RADIX2_BFLY 752 753 LDR r1, [sp, #0x44] 754 MOV r3, r0, ASR #4 755 STR r1, [sp, #0x18] 756RADIX2_BFLY_2: 757 LDR r1, [sp, #0x18] 758 LDRD r6, [r5] @r6 = x0r 759 ADD r5, r5, r0 760 LDRD r8, [r5] @r8 = x1r 761 762 LDR r2, [r1] 763 SUBS r3, r3, #1 764 765 766 767 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 768 LSR r1, r1, #31 769 ORR r11, r1, r11, LSL#1 770 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 771 LSR r1, r1, #31 772 ORR r10, r1, r10, LSL#1 773 774 775 LDR r1, [sp, #0x18] 776 LDR r2, [r1, #0x04] 777 ADD r1, r1, r4, LSL #3 778 STR r1, [sp, #0x18] 779 780 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 781 LSR r1, r1, #31 782 ORR r8, r1, r8, LSL#1 783 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 784 LSR r1, r1, #31 785 ORR r9, r1, r9, LSL#1 786 787 ADD r11, r11, r9 788 SUB r9, r10, r8 @ 789 MOV r8, r11 790 791 ADD r10, r8, r6 @(x0r>>1) + (x1r) 792 ASR r10, r10, #1 793 ADD r11, r9, r7 @(x0i>>1) + (x1i)@ 794 ASR r11, r11, #1 795 SUB r8, r6, r8 @(x0r>>1) - (x1r) 796 ASR r8, r8, #1 797 SUB r9, r7, r9 @(x0i>>1) - (x1i)@ 798 ASR r9, r9, #1 799 800 STRD r8, [r5] 801 SUB r5, r5, r0 802 STRD r10, [r5], #8 803 804 BNE RADIX2_BFLY_2 805 806EXIT: 807 ADD sp, sp, #0x54 808 LDMFD sp!, {r4-r12, pc} 809 810