1.text 2.p2align 2 3.global ixheaacd_complex_fft_p2_asm 4 5ixheaacd_complex_fft_p2_asm: 6 STMFD sp!, {r0-r12, lr} 7 SUB sp, sp, #0x44 8 LDR r0, [sp, #0x48] 9 EOR r0, r0, r0, ASR #31 10 CLZ r0, r0 11 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 12 SUB r0, r0, #1 13 RSB r0, r0, #0x1e 14 AND r1, r0, #1 15 STR r1, [sp, #0x30] 16 MOV r1, r0, ASR #1 17 LDR r0, [sp, #0x48] @npoints 18 STR r1, [sp, #0x18] 19 MOV lr, r0, LSL #1 @(npoints >>1) * 4 20 MOV r0, #0 21 22FIRST_STAGE_R4: 23 MOVW r4, #0x3333 24 MOVT r4, #0x3333 25 MOVW r5, #0x0F0F 26 MOVT r5, #0x0F0F 27 AND r6, r4, r0 28 AND r7, r4, r0, LSR #2 29 ORR r4, r7, r6, LSL #2 30 AND r6, r5, r4 31 AND r7, r5, r4, LSR #4 32 ORR r4, r7, r6, LSL #4 33 BIC r6, r4, #0x0000FF00 34 BIC r7, r4, #0x00FF0000 35 MOV r7, r7, LSR #8 36 ORR r4, r7, r6, LSL #8 37 LDR r5, [sp, #0x30] 38 MOV r10, r4, LSR r12 39 CMP r5, #0 40 ADDNE r10, r10, #1 41 BICNE r10, r10, #1 42 43 ADD r1, r2, r10, LSL #2 44 LDRD r4, [r1] @r4=x0r, r5=x0i 45 ADD r1, r1, lr 46 LDRD r8, [r1] @r8=x1r, r9=x1i 47 ADD r1, r1, lr 48 LDRD r6, [r1] @r6=x2r, r7=x2i 49 ADD r1, r1, lr 50 LDRD r10, [r1] @r10=x3r, r11=x3i 51 ADD r0, r0, #4 52 CMP r0, lr, ASR #1 53 54 ADD r4, r4, r6 @x0r = x0r + x2r@ 55 ADD r5, r5, r7 @x0i = x0i + x2i@ 56 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 57 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 58 ADD r8, r8, r10 @x1r = x1r + x3r@ 59 ADD r9, r9, r11 @x1i = x1i + x3i@ 60 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 61 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 62 63 ADD r4, r4, r8 @x0r = x0r + x1r@ 64 ADD r5, r5, r9 @x0i = x0i + x1i@ 65 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 66 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 67 ADD r6, r6, r11 @x2r = x2r + x3i@ 68 SUB r7, r7, r1 @x2i = x2i - x3r@ 69 SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 70 ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@ 71 72 STMIA r3!, {r4-r11} 73 BLT FIRST_STAGE_R4 74 LDR r1, [sp, #0x18] 75 LDR r0, [sp, #0x48] 76 MOV r12, #0x40 @nodespacing = 64@ 77 STR r12, [sp, #0x38] 78 LDR r12, [sp, #0x48] 79 SUB r3, r3, r0, LSL #3 80 SUBS r1, r1, #1 81 STR r3, [sp, #0x50] 82 MOV r4, r12, ASR #4 83 MOV r0, #4 84 STR r4, [sp, #0x34] 85 STR r1, [sp, #0x3c] 86 BLE RADIX2 87OUTER_LOOP: 88 LDR r1, [sp, #0x44] 89 LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ 90 STR r1, [sp, #0x2c] 91 LDR r1, [sp, #0x34] 92 93 MOV r0, r0, LSL #3 @(del<<1) * 4 94LOOP_TRIVIAL_TWIDDLE: 95 LDRD r4, [r12] @r4=x0r, r5=x0i 96 ADD r12, r12, r0 97 LDRD r6, [r12] @r6=x1r, r7=x1i 98 ADD r12, r12, r0 99 LDRD r8, [r12] @r8=x2r, r9=x2i 100 ADD r12, r12, r0 101 LDRD r10, [r12] @r10=x3r, r11=x3i 102 103@MOV r4,r4,ASR #1 104@MOV r5,r5,ASR #1 105@MOV r6,r6,ASR #1 106@MOV r7,r7,ASR #1 107@MOV r8,r8,ASR #1 108@MOV r9,r9,ASR #1 109@MOV r10,r10,ASR #1 110@MOV r11,r11,ASR #1 111 112 ADD r4, r4, r8 @x0r = x0r + x2r@ 113 ADD r5, r5, r9 @x0i = x0i + x2i@ 114 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 115 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 116 ADD r6, r6, r10 @x1r = x1r + x3r@ 117 ADD r7, r7, r11 @x1i = x1i + x3i@ 118 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 119 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 120 121 ADD r4, r4, r6 @x0r = x0r + x1r@ 122 ADD r5, r5, r7 @x0i = x0i + x1i@ 123@MOV r4,r4,ASR #1 124@MOV r5,r5,ASR #1 125 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 126 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 127 ADD r8, r8, r11 @x2r = x2r + x3i@ 128 SUB r9, r9, r2 @x2i = x2i - x3r@ 129 SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 130 ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1) 131 132 STRD r10, [r12] @r10=x3r, r11=x3i 133 SUB r12, r12, r0 134 STRD r6, [r12] @r6=x1r, r7=x1i 135 SUB r12, r12, r0 136 STRD r8, [r12] @r8=x2r, r9=x2i 137 SUB r12, r12, r0 138 STRD r4, [r12] @r4=x0r, r5=x0i 139 ADD r12, r12, r0, lsl #2 140 141 SUBS r1, r1, #1 142 BNE LOOP_TRIVIAL_TWIDDLE 143 144 MOV r0, r0, ASR #3 145 LDR r4, [sp, #0x38] 146 LDR r3, [sp, #0x50] 147 MUL r1, r0, r4 148 ADD r12, r3, #8 149 STR r1, [sp, #0x40] 150 MOV r3, r1, ASR #2 151 ADD r3, r3, r1, ASR #3 152 SUB r3, r3, r1, ASR #4 153 ADD r3, r3, r1, ASR #5 154 SUB r3, r3, r1, ASR #6 155 ADD r3, r3, r1, ASR #7 156 SUB r3, r3, r1, ASR #8 157 STR r3, [sp, #0x18] 158SECOND_LOOP: 159 LDR r3, [sp, #0x2c] 160 LDR r14, [sp, #0x34] 161 MOV r0, r0, LSL #3 @(del<<1) * 4 162 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 163 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 164 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 165 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 166 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 167 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 168 169 STR r4, [sp, #0x24] 170 STR r1, [sp, #0x14] 171 STR r2, [sp, #0x10] 172 STR r5, [sp, #0x0c] 173 STR r6, [sp, #0x08] 174 STR r7, [sp, #0x04] 175 STR r8, [sp] 176 177RADIX4_BFLY: 178 179 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 180 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 181 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 182 SUBS r14, r14, #1 183 184 LDR r1, [sp, #0x14] 185 LDR r2, [sp, #0x10] 186 187 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 188 LSR r3, r3, #31 189 ORR r4, r3, r4, LSL#1 190 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 191 LSR r3, r3, #31 192 ORR r6, r3, r6, LSL#1 193 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 194 LSR r3, r3, #31 195 ORR r5, r3, r5, LSL#1 196 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 197 LSR r3, r3, #31 198 ORR r7, r3, r7, LSL#1 199 ADD r7, r7, r6 200 SUB r6, r4, r5 @ 201 202 LDR r1, [sp, #0x0c] 203 LDR r2, [sp, #0x08] 204 205 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 206 LSR r3, r3, #31 207 ORR r4, r3, r4, LSL#1 208 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 209 LSR r3, r3, #31 210 ORR r8, r3, r8, LSL#1 211 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 212 LSR r3, r3, #31 213 ORR r5, r3, r5, LSL#1 214 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 215 LSR r3, r3, #31 216 ORR r9, r3, r9, LSL#1 217 ADD r9, r9, r8 218 SUB r8, r4, r5 @ 219 220 LDR r1, [sp, #0x04] 221 LDR r2, [sp] 222 223 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 224 LSR r3, r3, #31 225 ORR r4, r3, r4, LSL#1 226 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 227 LSR r3, r3, #31 228 ORR r10, r3, r10, LSL#1 229 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 230 LSR r3, r3, #31 231 ORR r5, r3, r5, LSL#1 232 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 233 LSR r3, r3, #31 234 ORR r11, r3, r11, LSL#1 235 ADD r11, r11, r10 236 SUB r10, r4, r5 @ 237 238 @SUB r12,r12,r0,lsl #1 239 @LDRD r4,[r12] @r4=x0r, r5=x0i 240 LDR r4, [r12, -r0, lsl #1]! @ 241 LDR r5, [r12, #0x04] 242 243 244 ADD r4, r8, r4 @x0r = x0r + x2r@ 245 ADD r5, r9, r5 @x0i = x0i + x2i@ 246 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 247 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 248 ADD r6, r6, r10 @x1r = x1r + x3r@ 249 ADD r7, r7, r11 @x1i = x1i + x3i@ 250 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 251 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 252 253 ADD r4, r4, r6 @x0r = x0r + x1r@ 254 ADD r5, r5, r7 @x0i = x0i + x1i@ 255 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 256 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 257 STRD r4, [r12] @r4=x0r, r5=x0i 258 ADD r12, r12, r0 259 260 ADD r8, r8, r11 @x2r = x2r + x3i@ 261 SUB r9, r9, r10 @x2i = x2i - x3r@ 262 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 263 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 264 265 STRD r8, [r12] @r8=x2r, r9=x2i 266 ADD r12, r12, r0 267 STRD r6, [r12] @r6=x1r, r7=x1i 268 ADD r12, r12, r0 269 STRD r4, [r12] @r10=x3r, r11=x3i 270 ADD r12, r12, r0 271 272 BNE RADIX4_BFLY 273 MOV r0, r0, ASR #3 274 275 LDR r1, [sp, #0x48] 276 LDR r4, [sp, #0x24] 277 SUB r1, r12, r1, LSL #3 278 LDR r6, [sp, #0x38] 279 ADD r12, r1, #8 280 LDR r7, [sp, #0x18] 281 ADD r4, r4, r6 282 CMP r4, r7 283 BLE SECOND_LOOP 284 285SECOND_LOOP_2: 286 LDR r3, [sp, #0x2c] 287 LDR r14, [sp, #0x34] 288 MOV r0, r0, LSL #3 @(del<<1) * 4 289 290 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 291 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 292 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 293 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 294 SUB r3, r3, #2048 @ 512 *4 295 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 296 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 297 298 STR r4, [sp, #0x24] 299 300 STR r1, [sp, #0x14] 301 STR r2, [sp, #0x10] 302 STR r5, [sp, #0x0c] 303 STR r6, [sp, #0x08] 304 STR r7, [sp, #0x04] 305 STR r8, [sp] 306 307RADIX4_BFLY_2: 308 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 309 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 310 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 311 SUBS r14, r14, #1 312 LDR r1, [sp, #0x14] 313 LDR r2, [sp, #0x10] 314 315 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 316 LSR r3, r3, #31 317 ORR r4, r3, r4, LSL#1 318 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 319 LSR r3, r3, #31 320 ORR r6, r3, r6, LSL#1 321 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 322 LSR r3, r3, #31 323 ORR r5, r3, r5, LSL#1 324 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 325 LSR r3, r3, #31 326 ORR r7, r3, r7, LSL#1 327 ADD r7, r7, r6 328 SUB r6, r4, r5 @ 329 330 LDR r1, [sp, #0x0c] 331 LDR r2, [sp, #0x08] 332 333 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 334 LSR r3, r3, #31 335 ORR r4, r3, r4, LSL#1 336 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 337 LSR r3, r3, #31 338 ORR r8, r3, r8, LSL#1 339 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 340 LSR r3, r3, #31 341 ORR r5, r3, r5, LSL#1 342 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 343 LSR r3, r3, #31 344 ORR r9, r3, r9, LSL#1 345 ADD r9, r9, r8 346 SUB r8, r4, r5 @ 347 348 LDR r1, [sp, #0x04] 349 LDR r2, [sp] 350 351 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 352 LSR r3, r3, #31 353 ORR r4, r3, r4, LSL#1 354 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 355 LSR r3, r3, #31 356 ORR r10, r3, r10, LSL#1 357 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 358 LSR r3, r3, #31 359 ORR r5, r3, r5, LSL#1 360 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 361 LSR r3, r3, #31 362 ORR r11, r3, r11, LSL#1 363 ADD r10, r11, r10 364 SUB r11, r5, r4 @ 365 366 @SUB r12,r12,r0,lsl #1 367 @LDRD r4,[r12] @r4=x0r, r5=x0i 368 LDR r4, [r12, -r0, lsl #1]! @ 369 LDR r5, [r12, #0x04] 370 371 372 ADD r4, r8, r4 @x0r = x0r + x2r@ 373 ADD r5, r9, r5 @x0i = x0i + x2i@ 374 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 375 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 376 ADD r6, r6, r10 @x1r = x1r + x3r@ 377 ADD r7, r7, r11 @x1i = x1i + x3i@ 378 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 379 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 380 381 ADD r4, r4, r6 @x0r = x0r + x1r@ 382 ADD r5, r5, r7 @x0i = x0i + x1i@ 383 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 384 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 385 STRD r4, [r12] @r4=x0r, r5=x0i 386 ADD r12, r12, r0 387 388 ADD r8, r8, r11 @x2r = x2r + x3i@ 389 SUB r9, r9, r10 @x2i = x2i - x3r@ 390 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 391 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 392 393 STRD r8, [r12] @r8=x2r, r9=x2i 394 ADD r12, r12, r0 395 STRD r6, [r12] @r6=x1r, r7=x1i 396 ADD r12, r12, r0 397 STRD r4, [r12] @r10=x3r, r11=x3i 398 ADD r12, r12, r0 399 400 BNE RADIX4_BFLY_2 401 MOV r0, r0, ASR #3 402 403 LDR r1, [sp, #0x48] 404 LDR r4, [sp, #0x24] 405 SUB r1, r12, r1, LSL #3 406 LDR r6, [sp, #0x38] 407 ADD r12, r1, #8 408 LDR r7, [sp, #0x40] 409 ADD r4, r4, r6 410 CMP r4, r7, ASR #1 411 BLE SECOND_LOOP_2 412 LDR r7, [sp, #0x18] 413 CMP r4, r7, LSL #1 414 BGT SECOND_LOOP_4 415 416SECOND_LOOP_3: 417 LDR r3, [sp, #0x2c] 418 LDR r14, [sp, #0x34] 419 MOV r0, r0, LSL #3 @(del<<1) * 4 420 421 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 422 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 423 SUB r3, r3, #2048 @ 512 *4 424 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 425 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 426 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 427 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 428 429 STR r4, [sp, #0x24] 430 STR r1, [sp, #0x14] 431 STR r2, [sp, #0x10] 432 STR r5, [sp, #0x0c] 433 STR r6, [sp, #0x08] 434 STR r7, [sp, #0x04] 435 STR r8, [sp] 436 437 438RADIX4_BFLY_3: 439 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 440 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 441 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 442 SUBS r14, r14, #1 443 444 LDR r1, [sp, #0x14] 445 LDR r2, [sp, #0x10] 446 447 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 448 LSR r3, r3, #31 449 ORR r4, r3, r4, LSL#1 450 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 451 LSR r3, r3, #31 452 ORR r6, r3, r6, LSL#1 453 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 454 LSR r3, r3, #31 455 ORR r5, r3, r5, LSL#1 456 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 457 LSR r3, r3, #31 458 ORR r7, r3, r7, LSL#1 459 ADD r7, r7, r6 460 SUB r6, r4, r5 @ 461 462 LDR r1, [sp, #0x0c] 463 LDR r2, [sp, #0x08] 464 465 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 466 LSR r3, r3, #31 467 ORR r4, r3, r4, LSL#1 468 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 469 LSR r3, r3, #31 470 ORR r8, r3, r8, LSL#1 471 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 472 LSR r3, r3, #31 473 ORR r5, r3, r5, LSL#1 474 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 475 LSR r3, r3, #31 476 ORR r9, r3, r9, LSL#1 477 ADD r8, r9, r8 478 SUB r9, r5, r4 @ 479 480 LDR r1, [sp, #0x04] 481 LDR r2, [sp] 482 483 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 484 LSR r3, r3, #31 485 ORR r4, r3, r4, LSL#1 486 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 487 LSR r3, r3, #31 488 ORR r10, r3, r10, LSL#1 489 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 490 LSR r3, r3, #31 491 ORR r5, r3, r5, LSL#1 492 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 493 LSR r3, r3, #31 494 ORR r11, r3, r11, LSL#1 495 ADD r10, r11, r10 496 SUB r11, r5, r4 @ 497 498 @SUB r12,r12,r0,lsl #1 499 @LDRD r4,[r12] @r4=x0r, r5=x0i 500 LDR r4, [r12, -r0, lsl #1]! @ 501 LDR r5, [r12, #0x04] 502 503 504 ADD r4, r8, r4 @x0r = x0r + x2r@ 505 ADD r5, r9, r5 @x0i = x0i + x2i@ 506 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 507 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 508 ADD r6, r6, r10 @x1r = x1r + x3r@ 509 ADD r7, r7, r11 @x1i = x1i + x3i@ 510 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 511 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 512 513 ADD r4, r4, r6 @x0r = x0r + x1r@ 514 ADD r5, r5, r7 @x0i = x0i + x1i@ 515 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 516 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 517 STRD r4, [r12] @r4=x0r, r5=x0i 518 ADD r12, r12, r0 519 520 ADD r8, r8, r11 @x2r = x2r + x3i@ 521 SUB r9, r9, r10 @x2i = x2i - x3r@ 522 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 523 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 524 525 STRD r8, [r12] @r8=x2r, r9=x2i 526 ADD r12, r12, r0 527 STRD r6, [r12] @r6=x1r, r7=x1i 528 ADD r12, r12, r0 529 STRD r4, [r12] @r10=x3r, r11=x3i 530 ADD r12, r12, r0 531 532 BNE RADIX4_BFLY_3 533 MOV r0, r0, ASR #3 534 535 LDR r1, [sp, #0x48] 536 LDR r4, [sp, #0x24] 537 SUB r1, r12, r1, LSL #3 538 LDR r6, [sp, #0x38] 539 ADD r12, r1, #8 540 LDR r7, [sp, #0x18] 541 ADD r4, r4, r6 542 CMP r4, r7, LSL #1 543 BLE SECOND_LOOP_3 544 545SECOND_LOOP_4: 546 LDR r3, [sp, #0x2c] 547 LDR r14, [sp, #0x34] 548 MOV r0, r0, LSL #3 @(del<<1) * 4 549 550 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 551 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 552 SUB r3, r3, #2048 @ 512 *4 553 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 554 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 555 SUB r3, r3, #2048 @ 512 *4 556 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 557 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 558 559 560 STR r4, [sp, #0x24] 561 STR r1, [sp, #0x14] 562 STR r2, [sp, #0x10] 563 STR r5, [sp, #0x0c] 564 STR r6, [sp, #0x08] 565 STR r7, [sp, #0x04] 566 STR r8, [sp] 567 568RADIX4_BFLY_4: 569 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 570 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 571 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 572 SUBS r14, r14, #1 573 574 LDR r1, [sp, #0x14] 575 LDR r2, [sp, #0x10] 576 577 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 578 LSR r3, r3, #31 579 ORR r4, r3, r4, LSL#1 580 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 581 LSR r3, r3, #31 582 ORR r6, r3, r6, LSL#1 583 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 584 LSR r3, r3, #31 585 ORR r5, r3, r5, LSL#1 586 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 587 LSR r3, r3, #31 588 ORR r7, r3, r7, LSL#1 589 ADD r7, r7, r6 590 SUB r6, r4, r5 @ 591 592 LDR r1, [sp, #0x0c] 593 LDR r2, [sp, #0x08] 594 595 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 596 LSR r3, r3, #31 597 ORR r4, r3, r4, LSL#1 598 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 599 LSR r3, r3, #31 600 ORR r8, r3, r8, LSL#1 601 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 602 LSR r3, r3, #31 603 ORR r5, r3, r5, LSL#1 604 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 605 LSR r3, r3, #31 606 ORR r9, r3, r9, LSL#1 607 ADD r8, r9, r8 608 SUB r9, r5, r4 @ 609 610 LDR r1, [sp, #0x04] 611 LDR r2, [sp] 612 613 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 614 LSR r3, r3, #31 615 ORR r4, r3, r4, LSL#1 616 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 617 LSR r3, r3, #31 618 ORR r10, r3, r10, LSL#1 619 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 620 LSR r3, r3, #31 621 ORR r5, r3, r5, LSL#1 622 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 623 LSR r3, r3, #31 624 ORR r11, r3, r11, LSL#1 625 ADD r11, r11, r10 626 SUB r10, r5, r4 @ 627 628 @SUB r12,r12,r0,lsl #1 629 @LDRD r4,[r12] @r4=x0r, r5=x0i 630 LDR r4, [r12, -r0, lsl #1]! @ 631 LDR r5, [r12, #0x04] 632 633 634 ADD r4, r8, r4 @x0r = x0r + x2r@ 635 ADD r5, r9, r5 @x0i = x0i + x2i@ 636 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 637 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 638 ADD r6, r6, r10 @x1r = x1r + x3r@ 639 SUB r7, r7, r11 @x1i = x1i - x3i@ 640 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 641 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 642 643 ADD r4, r4, r6 @x0r = x0r + x1r@ 644 ADD r5, r5, r7 @x0i = x0i + x1i@ 645 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 646 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 647 STRD r4, [r12] @r4=x0r, r5=x0i 648 ADD r12, r12, r0 649 650 ADD r8, r8, r11 @x2r = x2r + x3i@ 651 SUB r9, r9, r10 @x2i = x2i - x3r@ 652 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 653 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 654 655 STRD r8, [r12] @r8=x2r, r9=x2i 656 ADD r12, r12, r0 657 STRD r6, [r12] @r6=x1r, r7=x1i 658 ADD r12, r12, r0 659 STRD r4, [r12] @r10=x3r, r11=x3i 660 ADD r12, r12, r0 661 662 BNE RADIX4_BFLY_4 663 MOV r0, r0, ASR #3 664 665 LDR r1, [sp, #0x48] 666 LDR r4, [sp, #0x24] 667 SUB r1, r12, r1, LSL #3 668 LDR r6, [sp, #0x38] 669 ADD r12, r1, #8 670 LDR r7, [sp, #0x40] 671 ADD r4, r4, r6 672 CMP r4, r7 673 BLT SECOND_LOOP_4 674 675 LDR r1, [sp, #0x38] 676 MOV r0, r0, LSL #2 677 MOV r1, r1, ASR #2 678 STR r1, [sp, #0x38] 679 LDR r1, [sp, #0x34] 680 MOV r1, r1, ASR #2 681 STR r1, [sp, #0x34] 682 LDR r1, [sp, #0x3c] 683 SUBS r1, r1, #1 684 STR r1, [sp, #0x3c] 685 BGT OUTER_LOOP 686 687RADIX2: 688 LDR r1, [sp, #0x30] 689 CMP r1, #0 690 BEQ EXIT 691 LDR r12, [sp, #0x38] 692 LDR r1, [sp, #0x44] 693 CMP r12, #0 694 MOVEQ r4, #1 695 MOVNE r4, r12, LSL #1 696 MOVS r3, r0 697 BEQ EXIT 698 699 MOV r3, r3, ASR #1 700 LDR r5, [sp, #0x50] 701 MOV r0, r0, LSL #3 @(del<<1) * 4 702 STR r1, [sp, #0x18] 703RADIX2_BFLY: 704 LDR r1, [sp, #0x18] 705 LDRD r6, [r5] @r6 = x0r 706 ADD r5, r5, r0 707 LDRD r8, [r5] @r8 = x1r 708 709 LDR r2, [r1] 710 SUBS r3, r3, #1 711 712 713 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 714 LSR r1, r1, #31 715 ORR r11, r1, r11, LSL#1 716 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 717 LSR r1, r1, #31 718 ORR r10, r1, r10, LSL#1 719 720 721 LDR r1, [sp, #0x18] 722 LDR r2, [r1, #0x04] 723 ADD r1, r1, r4, LSL #3 724 STR r1, [sp, #0x18] 725 726 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 727 LSR r1, r1, #31 728 ORR r8, r1, r8, LSL#1 729 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 730 LSR r1, r1, #31 731 ORR r9, r1, r9, LSL#1 732 733 SUB r8, r8, r10 734 ADD r9, r9, r11 735 736 737 ADD r10, r8, r6 @(x0r/2) + (x1r/2) 738 ASR r10, r10, #1 739 ADD r11, r9, r7 @(x0i/2) + (x1i/2)@ 740 ASR r11, r11, #1 741 SUB r8, r6, r8 @(x0r/2) - (x1r/2) 742 ASR r8, r8, #1 743 SUB r9, r7, r9 @(x0i/2) - (x1i/2)@ 744 ASR r9, r9, #1 745 746 STRD r8, [r5] 747 SUB r5, r5, r0 748 STRD r10, [r5], #8 749 750 BNE RADIX2_BFLY 751 752 LDR r1, [sp, #0x44] 753 MOV r3, r0, ASR #4 754 STR r1, [sp, #0x18] 755RADIX2_BFLY_2: 756 LDR r1, [sp, #0x18] 757 LDRD r6, [r5] @r6 = x0r 758 ADD r5, r5, r0 759 LDRD r8, [r5] @r8 = x1r 760 761 LDR r2, [r1] 762 SUBS r3, r3, #1 763 764 765 766 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 767 LSR r1, r1, #31 768 ORR r11, r1, r11, LSL#1 769 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 770 LSR r1, r1, #31 771 ORR r10, r1, r10, LSL#1 772 773 774 LDR r1, [sp, #0x18] 775 LDR r2, [r1, #0x04] 776 ADD r1, r1, r4, LSL #3 777 STR r1, [sp, #0x18] 778 779 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 780 LSR r1, r1, #31 781 ORR r8, r1, r8, LSL#1 782 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 783 LSR r1, r1, #31 784 ORR r9, r1, r9, LSL#1 785 786 ADD r11, r11, r9 787 SUB r9, r10, r8 @ 788 MOV r8, r11 789 790 ADD r10, r8, r6 @(x0r>>1) + (x1r) 791 ASR r10, r10, #1 792 ADD r11, r9, r7 @(x0i>>1) + (x1i)@ 793 ASR r11, r11, #1 794 SUB r8, r6, r8 @(x0r>>1) - (x1r) 795 ASR r8, r8, #1 796 SUB r9, r7, r9 @(x0i>>1) - (x1i)@ 797 ASR r9, r9, #1 798 799 STRD r8, [r5] 800 SUB r5, r5, r0 801 STRD r10, [r5], #8 802 803 BNE RADIX2_BFLY_2 804 805EXIT: 806 ADD sp, sp, #0x54 807 LDMFD sp!, {r4-r12, pc} 808 809