1.text 2.p2align 2 3.global ixheaacd_mps_complex_fft_64_asm 4.type ixheaacd_mps_complex_fft_64_asm, %function 5 6ixheaacd_mps_complex_fft_64_asm: 7 @LDR r4,[sp] 8 STMFD sp!, {r0-r12, lr} 9 LDR r4, [sp, #0x38] 10 SUB sp, sp, #0x44 11 LDR r0, [sp, #0x48] 12 EOR r0, r0, r0, ASR #31 13 CLZ r0, r0 14 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 15 SUB r0, r0, #1 16 RSB r0, r0, #0x1e 17 AND r1, r0, #1 18 STR r1, [sp, #0x30] 19 MOV r1, r0, ASR #1 20 LDR r0, [sp, #0x48] @npoints 21 STR r1, [sp, #0x18] 22 MOV lr, r0, LSL #1 @(npoints >>1) * 4 23 MOV r0, #0 24 MOV r12, r4 25FIRST_STAGE_R4: 26 LDRB r10, [r12, r0, LSR #2] 27 28 29 ADD r1, r2, r10, LSL #2 30 LDRD r4, [r1] @r4=x0r, r5=x0i 31 ADD r1, r1, lr 32 LDRD r8, [r1] @r8=x1r, r9=x1i 33 ADD r1, r1, lr 34 LDRD r6, [r1] @r6=x2r, r7=x2i 35 ADD r1, r1, lr 36 LDRD r10, [r1] @r10=x3r, r11=x3i 37 ADD r0, r0, #4 38 CMP r0, lr, ASR #1 39 40 ADD r4, r4, r6 @x0r = x0r + x2r@ 41 ADD r5, r5, r7 @x0i = x0i + x2i@ 42 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 43 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 44 ADD r8, r8, r10 @x1r = x1r + x3r@ 45 ADD r9, r9, r11 @x1i = x1i + x3i@ 46 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 47 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 48 49 ADD r4, r4, r8 @x0r = x0r + x1r@ 50 ADD r5, r5, r9 @x0i = x0i + x1i@ 51 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 52 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 53 ADD r6, r6, r11 @x2r = x2r + x3i@ 54 SUB r7, r7, r1 @x2i = x2i - x3r@ 55 SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 56 ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@ 57 58 STMIA r3!, {r4-r11} 59 BLT FIRST_STAGE_R4 60 LDR r1, [sp, #0x18] 61 LDR r0, [sp, #0x48] 62 MOV r12, #0x40 @nodespacing = 64@ 63 STR r12, [sp, #0x38] 64 LDR r12, [sp, #0x48] 65 SUB r3, r3, r0, LSL #3 66 SUBS r1, r1, #1 67 STR r3, [sp, #0x50] 68 MOV r4, r12, ASR #4 69 MOV r0, #4 70 STR r4, [sp, #0x34] 71 STR r1, [sp, #0x3c] 72 BLE EXIT 73OUTER_LOOP: 74 LDR r1, [sp, #0x44] 75 LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ 76 STR r1, [sp, #0x2c] 77 LDR r1, [sp, #0x34] 78 79 MOV r0, r0, LSL #3 @(del<<1) * 4 80LOOP_TRIVIAL_TWIDDLE: 81 LDRD r4, [r12] @r4=x0r, r5=x0i 82 ADD r12, r12, r0 83 LDRD r6, [r12] @r6=x1r, r7=x1i 84 ADD r12, r12, r0 85 LDRD r8, [r12] @r8=x2r, r9=x2i 86 ADD r12, r12, r0 87 LDRD r10, [r12] @r10=x3r, r11=x3i 88 89@MOV r4,r4,ASR #1 90@MOV r5,r5,ASR #1 91@MOV r6,r6,ASR #1 92@MOV r7,r7,ASR #1 93@MOV r8,r8,ASR #1 94@MOV r9,r9,ASR #1 95@MOV r10,r10,ASR #1 96@MOV r11,r11,ASR #1 97 98 ADD r4, r4, r8 @x0r = x0r + x2r@ 99 ADD r5, r5, r9 @x0i = x0i + x2i@ 100 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 101 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 102 ADD r6, r6, r10 @x1r = x1r + x3r@ 103 ADD r7, r7, r11 @x1i = x1i + x3i@ 104 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 105 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 106 107 ADD r4, r4, r6 @x0r = x0r + x1r@ 108 ADD r5, r5, r7 @x0i = x0i + x1i@ 109@MOV r4,r4,ASR #1 110@MOV r5,r5,ASR #1 111 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 112 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 113 ADD r8, r8, r11 @x2r = x2r + x3i@ 114 SUB r9, r9, r2 @x2i = x2i - x3r@ 115 SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 116 ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1) 117 118 STRD r10, [r12] @r10=x3r, r11=x3i 119 SUB r12, r12, r0 120 STRD r6, [r12] @r6=x1r, r7=x1i 121 SUB r12, r12, r0 122 STRD r8, [r12] @r8=x2r, r9=x2i 123 SUB r12, r12, r0 124 STRD r4, [r12] @r4=x0r, r5=x0i 125 ADD r12, r12, r0, lsl #2 126 127 SUBS r1, r1, #1 128 BNE LOOP_TRIVIAL_TWIDDLE 129 130 MOV r0, r0, ASR #3 131 LDR r4, [sp, #0x38] 132 LDR r3, [sp, #0x50] 133 MUL r1, r0, r4 134 ADD r12, r3, #8 135 STR r1, [sp, #0x40] 136 MOV r3, r1, ASR #2 137 ADD r3, r3, r1, ASR #3 138 SUB r3, r3, r1, ASR #4 139 ADD r3, r3, r1, ASR #5 140 SUB r3, r3, r1, ASR #6 141 ADD r3, r3, r1, ASR #7 142 SUB r3, r3, r1, ASR #8 143 STR r3, [sp, #0x18] 144SECOND_LOOP: 145 LDR r3, [sp, #0x2c] 146 LDR r14, [sp, #0x34] 147 MOV r0, r0, LSL #3 @(del<<1) * 4 148 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 149 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 150 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 151 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 152 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 153 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 154 155 STR r4, [sp, #0x24] 156 STR r1, [sp, #0x14] 157 STR r2, [sp, #0x10] 158 STR r5, [sp, #0x0c] 159 STR r6, [sp, #0x08] 160 STR r7, [sp, #0x04] 161 STR r8, [sp] 162 163RADIX4_BFLY: 164 165 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 166 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 167 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 168 SUBS r14, r14, #1 169 170 LDR r1, [sp, #0x14] 171 LDR r2, [sp, #0x10] 172 173 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 174 LSR r3, r3, #31 175 ORR r4, r3, r4, LSL#1 176 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 177 LSR r3, r3, #31 178 ORR r6, r3, r6, LSL#1 179 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 180 LSR r3, r3, #31 181 ORR r5, r3, r5, LSL#1 182 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 183 LSR r3, r3, #31 184 ORR r7, r3, r7, LSL#1 185 ADD r7, r7, r6 186 SUB r6, r4, r5 @ 187 188 LDR r1, [sp, #0x0c] 189 LDR r2, [sp, #0x08] 190 191 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 192 LSR r3, r3, #31 193 ORR r4, r3, r4, LSL#1 194 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 195 LSR r3, r3, #31 196 ORR r8, r3, r8, LSL#1 197 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 198 LSR r3, r3, #31 199 ORR r5, r3, r5, LSL#1 200 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 201 LSR r3, r3, #31 202 ORR r9, r3, r9, LSL#1 203 ADD r9, r9, r8 204 SUB r8, r4, r5 @ 205 206 LDR r1, [sp, #0x04] 207 LDR r2, [sp] 208 209 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 210 LSR r3, r3, #31 211 ORR r4, r3, r4, LSL#1 212 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 213 LSR r3, r3, #31 214 ORR r10, r3, r10, LSL#1 215 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 216 LSR r3, r3, #31 217 ORR r5, r3, r5, LSL#1 218 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 219 LSR r3, r3, #31 220 ORR r11, r3, r11, LSL#1 221 ADD r11, r11, r10 222 SUB r10, r4, r5 @ 223 224 @SUB r12,r12,r0,lsl #1 225 @LDRD r4,[r12] @r4=x0r, r5=x0i 226 LDR r4, [r12, -r0, lsl #1]! @ 227 LDR r5, [r12, #0x04] 228 229 230 ADD r4, r8, r4 @x0r = x0r + x2r@ 231 ADD r5, r9, r5 @x0i = x0i + x2i@ 232 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 233 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 234 ADD r6, r6, r10 @x1r = x1r + x3r@ 235 ADD r7, r7, r11 @x1i = x1i + x3i@ 236 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 237 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 238 239 ADD r4, r4, r6 @x0r = x0r + x1r@ 240 ADD r5, r5, r7 @x0i = x0i + x1i@ 241 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 242 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 243 STRD r4, [r12] @r4=x0r, r5=x0i 244 ADD r12, r12, r0 245 246 ADD r8, r8, r11 @x2r = x2r + x3i@ 247 SUB r9, r9, r10 @x2i = x2i - x3r@ 248 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 249 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 250 251 STRD r8, [r12] @r8=x2r, r9=x2i 252 ADD r12, r12, r0 253 STRD r6, [r12] @r6=x1r, r7=x1i 254 ADD r12, r12, r0 255 STRD r4, [r12] @r10=x3r, r11=x3i 256 ADD r12, r12, r0 257 258 BNE RADIX4_BFLY 259 MOV r0, r0, ASR #3 260 261 LDR r1, [sp, #0x48] 262 LDR r4, [sp, #0x24] 263 SUB r1, r12, r1, LSL #3 264 LDR r6, [sp, #0x38] 265 ADD r12, r1, #8 266 LDR r7, [sp, #0x18] 267 ADD r4, r4, r6 268 CMP r4, r7 269 BLE SECOND_LOOP 270 271SECOND_LOOP_2: 272 LDR r3, [sp, #0x2c] 273 LDR r14, [sp, #0x34] 274 MOV r0, r0, LSL #3 @(del<<1) * 4 275 276 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 277 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 278 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 279 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 280 SUB r3, r3, #2048 @ 512 *4 281 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 282 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 283 284 STR r4, [sp, #0x24] 285 286 STR r1, [sp, #0x14] 287 STR r2, [sp, #0x10] 288 STR r5, [sp, #0x0c] 289 STR r6, [sp, #0x08] 290 STR r7, [sp, #0x04] 291 STR r8, [sp] 292 293RADIX4_BFLY_2: 294 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 295 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 296 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 297 SUBS r14, r14, #1 298 LDR r1, [sp, #0x14] 299 LDR r2, [sp, #0x10] 300 301 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 302 LSR r3, r3, #31 303 ORR r4, r3, r4, LSL#1 304 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 305 LSR r3, r3, #31 306 ORR r6, r3, r6, LSL#1 307 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 308 LSR r3, r3, #31 309 ORR r5, r3, r5, LSL#1 310 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 311 LSR r3, r3, #31 312 ORR r7, r3, r7, LSL#1 313 ADD r7, r7, r6 314 SUB r6, r4, r5 @ 315 316 LDR r1, [sp, #0x0c] 317 LDR r2, [sp, #0x08] 318 319 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 320 LSR r3, r3, #31 321 ORR r4, r3, r4, LSL#1 322 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 323 LSR r3, r3, #31 324 ORR r8, r3, r8, LSL#1 325 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 326 LSR r3, r3, #31 327 ORR r5, r3, r5, LSL#1 328 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 329 LSR r3, r3, #31 330 ORR r9, r3, r9, LSL#1 331 ADD r9, r9, r8 332 SUB r8, r4, r5 @ 333 334 LDR r1, [sp, #0x04] 335 LDR r2, [sp] 336 337 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 338 LSR r3, r3, #31 339 ORR r4, r3, r4, LSL#1 340 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 341 LSR r3, r3, #31 342 ORR r10, r3, r10, LSL#1 343 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 344 LSR r3, r3, #31 345 ORR r5, r3, r5, LSL#1 346 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 347 LSR r3, r3, #31 348 ORR r11, r3, r11, LSL#1 349 ADD r10, r11, r10 350 SUB r11, r5, r4 @ 351 352 @SUB r12,r12,r0,lsl #1 353 @LDRD r4,[r12] @r4=x0r, r5=x0i 354 LDR r4, [r12, -r0, lsl #1]! @ 355 LDR r5, [r12, #0x04] 356 357 358 ADD r4, r8, r4 @x0r = x0r + x2r@ 359 ADD r5, r9, r5 @x0i = x0i + x2i@ 360 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 361 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 362 ADD r6, r6, r10 @x1r = x1r + x3r@ 363 ADD r7, r7, r11 @x1i = x1i + x3i@ 364 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 365 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 366 367 ADD r4, r4, r6 @x0r = x0r + x1r@ 368 ADD r5, r5, r7 @x0i = x0i + x1i@ 369 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 370 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 371 STRD r4, [r12] @r4=x0r, r5=x0i 372 ADD r12, r12, r0 373 374 ADD r8, r8, r11 @x2r = x2r + x3i@ 375 SUB r9, r9, r10 @x2i = x2i - x3r@ 376 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 377 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 378 379 STRD r8, [r12] @r8=x2r, r9=x2i 380 ADD r12, r12, r0 381 STRD r6, [r12] @r6=x1r, r7=x1i 382 ADD r12, r12, r0 383 STRD r4, [r12] @r10=x3r, r11=x3i 384 ADD r12, r12, r0 385 386 BNE RADIX4_BFLY_2 387 MOV r0, r0, ASR #3 388 389 LDR r1, [sp, #0x48] 390 LDR r4, [sp, #0x24] 391 SUB r1, r12, r1, LSL #3 392 LDR r6, [sp, #0x38] 393 ADD r12, r1, #8 394 LDR r7, [sp, #0x40] 395 ADD r4, r4, r6 396 CMP r4, r7, ASR #1 397 BLE SECOND_LOOP_2 398 LDR r7, [sp, #0x18] 399 CMP r4, r7, LSL #1 400 BGT SECOND_LOOP_4 401 402SECOND_LOOP_3: 403 LDR r3, [sp, #0x2c] 404 LDR r14, [sp, #0x34] 405 MOV r0, r0, LSL #3 @(del<<1) * 4 406 407 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 408 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 409 SUB r3, r3, #2048 @ 512 *4 410 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 411 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 412 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 413 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 414 415 STR r4, [sp, #0x24] 416 STR r1, [sp, #0x14] 417 STR r2, [sp, #0x10] 418 STR r5, [sp, #0x0c] 419 STR r6, [sp, #0x08] 420 STR r7, [sp, #0x04] 421 STR r8, [sp] 422 423 424RADIX4_BFLY_3: 425 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 426 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 427 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 428 SUBS r14, r14, #1 429 430 LDR r1, [sp, #0x14] 431 LDR r2, [sp, #0x10] 432 433 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 434 LSR r3, r3, #31 435 ORR r4, r3, r4, LSL#1 436 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 437 LSR r3, r3, #31 438 ORR r6, r3, r6, LSL#1 439 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 440 LSR r3, r3, #31 441 ORR r5, r3, r5, LSL#1 442 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 443 LSR r3, r3, #31 444 ORR r7, r3, r7, LSL#1 445 ADD r7, r7, r6 446 SUB r6, r4, r5 @ 447 448 LDR r1, [sp, #0x0c] 449 LDR r2, [sp, #0x08] 450 451 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 452 LSR r3, r3, #31 453 ORR r4, r3, r4, LSL#1 454 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 455 LSR r3, r3, #31 456 ORR r8, r3, r8, LSL#1 457 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 458 LSR r3, r3, #31 459 ORR r5, r3, r5, LSL#1 460 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 461 LSR r3, r3, #31 462 ORR r9, r3, r9, LSL#1 463 ADD r8, r9, r8 464 SUB r9, r5, r4 @ 465 466 LDR r1, [sp, #0x04] 467 LDR r2, [sp] 468 469 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 470 LSR r3, r3, #31 471 ORR r4, r3, r4, LSL#1 472 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 473 LSR r3, r3, #31 474 ORR r10, r3, r10, LSL#1 475 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 476 LSR r3, r3, #31 477 ORR r5, r3, r5, LSL#1 478 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 479 LSR r3, r3, #31 480 ORR r11, r3, r11, LSL#1 481 ADD r10, r11, r10 482 SUB r11, r5, r4 @ 483 484 @SUB r12,r12,r0,lsl #1 485 @LDRD r4,[r12] @r4=x0r, r5=x0i 486 LDR r4, [r12, -r0, lsl #1]! @ 487 LDR r5, [r12, #0x04] 488 489 490 ADD r4, r8, r4 @x0r = x0r + x2r@ 491 ADD r5, r9, r5 @x0i = x0i + x2i@ 492 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 493 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 494 ADD r6, r6, r10 @x1r = x1r + x3r@ 495 ADD r7, r7, r11 @x1i = x1i + x3i@ 496 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 497 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 498 499 ADD r4, r4, r6 @x0r = x0r + x1r@ 500 ADD r5, r5, r7 @x0i = x0i + x1i@ 501 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 502 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 503 STRD r4, [r12] @r4=x0r, r5=x0i 504 ADD r12, r12, r0 505 506 ADD r8, r8, r11 @x2r = x2r + x3i@ 507 SUB r9, r9, r10 @x2i = x2i - x3r@ 508 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 509 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 510 511 STRD r8, [r12] @r8=x2r, r9=x2i 512 ADD r12, r12, r0 513 STRD r6, [r12] @r6=x1r, r7=x1i 514 ADD r12, r12, r0 515 STRD r4, [r12] @r10=x3r, r11=x3i 516 ADD r12, r12, r0 517 518 BNE RADIX4_BFLY_3 519 MOV r0, r0, ASR #3 520 521 LDR r1, [sp, #0x48] 522 LDR r4, [sp, #0x24] 523 SUB r1, r12, r1, LSL #3 524 LDR r6, [sp, #0x38] 525 ADD r12, r1, #8 526 LDR r7, [sp, #0x18] 527 ADD r4, r4, r6 528 CMP r4, r7, LSL #1 529 BLE SECOND_LOOP_3 530 531SECOND_LOOP_4: 532 LDR r3, [sp, #0x2c] 533 LDR r14, [sp, #0x34] 534 MOV r0, r0, LSL #3 @(del<<1) * 4 535 536 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 537 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 538 SUB r3, r3, #2048 @ 512 *4 539 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 540 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 541 SUB r3, r3, #2048 @ 512 *4 542 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 543 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 544 545 546 STR r4, [sp, #0x24] 547 STR r1, [sp, #0x14] 548 STR r2, [sp, #0x10] 549 STR r5, [sp, #0x0c] 550 STR r6, [sp, #0x08] 551 STR r7, [sp, #0x04] 552 STR r8, [sp] 553 554RADIX4_BFLY_4: 555 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 556 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 557 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 558 SUBS r14, r14, #1 559 560 LDR r1, [sp, #0x14] 561 LDR r2, [sp, #0x10] 562 563 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 564 LSR r3, r3, #31 565 ORR r4, r3, r4, LSL#1 566 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 567 LSR r3, r3, #31 568 ORR r6, r3, r6, LSL#1 569 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 570 LSR r3, r3, #31 571 ORR r5, r3, r5, LSL#1 572 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 573 LSR r3, r3, #31 574 ORR r7, r3, r7, LSL#1 575 ADD r7, r7, r6 576 SUB r6, r4, r5 @ 577 578 LDR r1, [sp, #0x0c] 579 LDR r2, [sp, #0x08] 580 581 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 582 LSR r3, r3, #31 583 ORR r4, r3, r4, LSL#1 584 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 585 LSR r3, r3, #31 586 ORR r8, r3, r8, LSL#1 587 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 588 LSR r3, r3, #31 589 ORR r5, r3, r5, LSL#1 590 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 591 LSR r3, r3, #31 592 ORR r9, r3, r9, LSL#1 593 ADD r8, r9, r8 594 SUB r9, r5, r4 @ 595 596 LDR r1, [sp, #0x04] 597 LDR r2, [sp] 598 599 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 600 LSR r3, r3, #31 601 ORR r4, r3, r4, LSL#1 602 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 603 LSR r3, r3, #31 604 ORR r10, r3, r10, LSL#1 605 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 606 LSR r3, r3, #31 607 ORR r5, r3, r5, LSL#1 608 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 609 LSR r3, r3, #31 610 ORR r11, r3, r11, LSL#1 611 ADD r11, r11, r10 612 SUB r10, r5, r4 @ 613 614 @SUB r12,r12,r0,lsl #1 615 @LDRD r4,[r12] @r4=x0r, r5=x0i 616 LDR r4, [r12, -r0, lsl #1]! @ 617 LDR r5, [r12, #0x04] 618 619 620 ADD r4, r8, r4 @x0r = x0r + x2r@ 621 ADD r5, r9, r5 @x0i = x0i + x2i@ 622 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 623 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 624 ADD r6, r6, r10 @x1r = x1r + x3r@ 625 SUB r7, r7, r11 @x1i = x1i - x3i@ 626 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 627 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 628 629 ADD r4, r4, r6 @x0r = x0r + x1r@ 630 ADD r5, r5, r7 @x0i = x0i + x1i@ 631 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 632 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 633 STRD r4, [r12] @r4=x0r, r5=x0i 634 ADD r12, r12, r0 635 ADD r8, r8, r11 @x2r = x2r + x3i@ 636 SUB r9, r9, r10 @x2i = x2i - x3r@ 637 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 638 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 639 640 STRD r8, [r12] @r8=x2r, r9=x2i 641 ADD r12, r12, r0 642 STRD r6, [r12] @r6=x1r, r7=x1i 643 ADD r12, r12, r0 644 STRD r4, [r12] @r10=x3r, r11=x3i 645 ADD r12, r12, r0 646 647 BNE RADIX4_BFLY_4 648 MOV r0, r0, ASR #3 649 650 LDR r1, [sp, #0x48] 651 LDR r4, [sp, #0x24] 652 SUB r1, r12, r1, LSL #3 653 LDR r6, [sp, #0x38] 654 ADD r12, r1, #8 655 LDR r7, [sp, #0x40] 656 ADD r4, r4, r6 657 CMP r4, r7 658 BLT SECOND_LOOP_4 659 660 LDR r1, [sp, #0x38] 661 MOV r0, r0, LSL #2 662 MOV r1, r1, ASR #2 663 STR r1, [sp, #0x38] 664 LDR r1, [sp, #0x34] 665 MOV r1, r1, ASR #2 666 STR r1, [sp, #0x34] 667 LDR r1, [sp, #0x3c] 668 SUBS r1, r1, #1 669 STR r1, [sp, #0x3c] 670 BGT OUTER_LOOP 671 672 LDR r1, [sp, #0x30] 673 CMP r1, #0 674 BEQ EXIT 675 LDR r12, [sp, #0x38] 676 LDR r1, [sp, #0x44] 677 CMP r12, #0 678 MOVEQ r4, #1 679 MOVNE r4, r12, LSL #1 680 MOVS r3, r0 681 BEQ EXIT 682 683 MOV r3, r3, ASR #1 684 LDR r5, [sp, #0x50] 685 MOV r0, r0, LSL #3 @(del<<1) * 4 686 STR r1, [sp, #0x18] 687 688EXIT: 689 ADD sp, sp, #0x54 690 LDMFD sp!, {r4-r12, pc} 691 692