1.text 2.p2align 2 3.global ixheaacd_mps_complex_fft_64_asm 4 5ixheaacd_mps_complex_fft_64_asm: 6 @LDR r4,[sp] 7 STMFD sp!, {r0-r12, lr} 8 LDR r4, [sp, #0x38] 9 SUB sp, sp, #0x28 10@ LDR r4,[sp,#0x30] 11 LDR r0, [sp, #0x2c] 12 @LDR r12,[sp,#0x5c+4] 13 EOR r0, r0, r0, ASR #31 14 CLZ r0, r0 15 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 16 SUB r0, r0, #1 17 RSB r0, r0, #0x1e 18 AND r1, r0, #1 19 STR r1, [sp, #0x14] 20 MOV r1, r0, ASR #1 21 LDR r0, [sp, #0x2c] @npoints 22 STR r1, [sp, #-4]! 23 MOV lr, r0, LSL #1 @(npoints >>1) * 4 24 MOV r0, #0 25 MOV r12, r4 26FIRST_STAGE_R4: 27 LDRB r10, [r12, r0, LSR #2] 28 29 30 ADD r1, r2, r10, LSL #2 31 LDRD r4, [r1] @r4=x0r, r5=x0i 32 ADD r1, r1, lr 33 LDRD r8, [r1] @r8=x1r, r9=x1i 34 ADD r1, r1, lr 35 LDRD r6, [r1] @r6=x2r, r7=x2i 36 ADD r1, r1, lr 37 LDRD r10, [r1] @r10=x3r, r11=x3i 38 ADD r0, r0, #4 39 CMP r0, lr, ASR #1 40 41 ADD r4, r4, r6 @x0r = x0r + x2r@ 42 ADD r5, r5, r7 @x0i = x0i + x2i@ 43 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 44 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 45 ADD r8, r8, r10 @x1r = x1r + x3r@ 46 ADD r9, r9, r11 @x1i = x1i + x3i@ 47 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 48 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 49 50 ADD r4, r4, r8 @x0r = x0r + x1r@ 51 ADD r5, r5, r9 @x0i = x0i + x1i@ 52 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 53 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 54 ADD r6, r6, r11 @x2r = x2r + x3i@ 55 SUB r7, r7, r1 @x2i = x2i - x3r@ 56 SUB r10, r6, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 57 ADD r11, r7, r1, lsl#1 @x3r = x2i + (x3r << 1)@ 58 59 STMIA r3!, {r4-r11} 60 BLT FIRST_STAGE_R4 61 LDR r1, [sp], #4 62 LDR r0, [sp, #0x2c] 63 MOV r12, #0x40 @nodespacing = 64@ 64 STR r12, [sp, #0x1c] 65 LDR r12, [sp, #0x2c] 66 SUB r3, r3, r0, LSL #3 67 SUBS r1, r1, #1 68 STR r3, [sp, #0x34] 69 MOV r4, r12, ASR #4 70 MOV r0, #4 71 STR r4, [sp, #0x18] 72 STR r1, [sp, #0x20] 73 BLE EXIT 74OUTER_LOOP: 75 LDR r1, [sp, #0x28] 76 LDR r12, [sp, #0x34] @WORD32 *data = ptr_y@ 77 STR r1, [sp, #0x10] 78 LDR r1, [sp, #0x18] 79 80 MOV r0, r0, LSL #3 @(del<<1) * 4 81LOOP_TRIVIAL_TWIDDLE: 82 LDRD r4, [r12] @r4=x0r, r5=x0i 83 ADD r12, r12, r0 84 LDRD r6, [r12] @r6=x1r, r7=x1i 85 ADD r12, r12, r0 86 LDRD r8, [r12] @r8=x2r, r9=x2i 87 ADD r12, r12, r0 88 LDRD r10, [r12] @r10=x3r, r11=x3i 89 90@MOV r4,r4,ASR #1 91@MOV r5,r5,ASR #1 92@MOV r6,r6,ASR #1 93@MOV r7,r7,ASR #1 94@MOV r8,r8,ASR #1 95@MOV r9,r9,ASR #1 96@MOV r10,r10,ASR #1 97@MOV r11,r11,ASR #1 98 99 ADD r4, r4, r8 @x0r = x0r + x2r@ 100 ADD r5, r5, r9 @x0i = x0i + x2i@ 101 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 102 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 103 ADD r6, r6, r10 @x1r = x1r + x3r@ 104 ADD r7, r7, r11 @x1i = x1i + x3i@ 105 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 106 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 107 108 ADD r4, r4, r6 @x0r = x0r + x1r@ 109 ADD r5, r5, r7 @x0i = x0i + x1i@ 110@MOV r4,r4,ASR #1 111@MOV r5,r5,ASR #1 112 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 113 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 114 ADD r8, r8, r11 @x2r = x2r + x3i@ 115 SUB r9, r9, r2 @x2i = x2i - x3r@ 116 SUB r10, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 117 ADD r11, r9, r2, lsl#1 @x3r = x2i + (x3r << 1) 118 119 STRD r10, [r12] @r10=x3r, r11=x3i 120 SUB r12, r12, r0 121 STRD r6, [r12] @r6=x1r, r7=x1i 122 SUB r12, r12, r0 123 STRD r8, [r12] @r8=x2r, r9=x2i 124 SUB r12, r12, r0 125 STRD r4, [r12] @r4=x0r, r5=x0i 126 ADD r12, r12, r0, lsl #2 127 128 SUBS r1, r1, #1 129 BNE LOOP_TRIVIAL_TWIDDLE 130 131 MOV r0, r0, ASR #3 132 LDR r4, [sp, #0x1c] 133 LDR r3, [sp, #0x34] 134 MUL r1, r0, r4 135 ADD r12, r3, #8 136 STR r1, [sp, #0x24] 137 MOV r3, r1, ASR #2 138 ADD r3, r3, r1, ASR #3 139 SUB r3, r3, r1, ASR #4 140 ADD r3, r3, r1, ASR #5 141 SUB r3, r3, r1, ASR #6 142 ADD r3, r3, r1, ASR #7 143 SUB r3, r3, r1, ASR #8 144 STR r3, [sp, #-4]! 145SECOND_LOOP: 146 LDR r3, [sp, #0x10+4] 147 LDR r14, [sp, #0x18+4] 148 MOV r0, r0, LSL #3 @(del<<1) * 4 149 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 150 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 151 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 152 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 153 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 154 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 155 156 STR r4, [sp, #8+4] 157 STR r1, [sp, #-4] 158 STR r2, [sp, #-8] 159 STR r5, [sp, #-12] 160 STR r6, [sp, #-16] 161 STR r7, [sp, #-20] 162 STR r8, [sp, #-24] 163 164RADIX4_BFLY: 165 166 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 167 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 168 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 169 SUBS r14, r14, #1 170 171 LDR r1, [sp, #-4] 172 LDR r2, [sp, #-8] 173 174 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 175 LSR r3, r3, #31 176 ORR r4, r3, r4, LSL#1 177 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 178 LSR r3, r3, #31 179 ORR r6, r3, r6, LSL#1 180 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 181 LSR r3, r3, #31 182 ORR r5, r3, r5, LSL#1 183 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 184 LSR r3, r3, #31 185 ORR r7, r3, r7, LSL#1 186 ADD r7, r7, r6 187 SUB r6, r4, r5 @ 188 189 LDR r1, [sp, #-12] 190 LDR r2, [sp, #-16] 191 192 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 193 LSR r3, r3, #31 194 ORR r4, r3, r4, LSL#1 195 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 196 LSR r3, r3, #31 197 ORR r8, r3, r8, LSL#1 198 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 199 LSR r3, r3, #31 200 ORR r5, r3, r5, LSL#1 201 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 202 LSR r3, r3, #31 203 ORR r9, r3, r9, LSL#1 204 ADD r9, r9, r8 205 SUB r8, r4, r5 @ 206 207 LDR r1, [sp, #-20] 208 LDR r2, [sp, #-24] 209 210 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 211 LSR r3, r3, #31 212 ORR r4, r3, r4, LSL#1 213 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 214 LSR r3, r3, #31 215 ORR r10, r3, r10, LSL#1 216 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 217 LSR r3, r3, #31 218 ORR r5, r3, r5, LSL#1 219 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 220 LSR r3, r3, #31 221 ORR r11, r3, r11, LSL#1 222 ADD r11, r11, r10 223 SUB r10, r4, r5 @ 224 225 @SUB r12,r12,r0,lsl #1 226 @LDRD r4,[r12] @r4=x0r, r5=x0i 227 LDR r4, [r12, -r0, lsl #1]! @ 228 LDR r5, [r12, #4] 229 230 231 ADD r4, r8, r4 @x0r = x0r + x2r@ 232 ADD r5, r9, r5 @x0i = x0i + x2i@ 233 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 234 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 235 ADD r6, r6, r10 @x1r = x1r + x3r@ 236 ADD r7, r7, r11 @x1i = x1i + x3i@ 237 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 238 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 239 240 ADD r4, r4, r6 @x0r = x0r + x1r@ 241 ADD r5, r5, r7 @x0i = x0i + x1i@ 242 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 243 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 244 STRD r4, [r12] @r4=x0r, r5=x0i 245 ADD r12, r12, r0 246 247 ADD r8, r8, r11 @x2r = x2r + x3i@ 248 SUB r9, r9, r10 @x2i = x2i - x3r@ 249 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 250 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 251 252 STRD r8, [r12] @r8=x2r, r9=x2i 253 ADD r12, r12, r0 254 STRD r6, [r12] @r6=x1r, r7=x1i 255 ADD r12, r12, r0 256 STRD r4, [r12] @r10=x3r, r11=x3i 257 ADD r12, r12, r0 258 259 BNE RADIX4_BFLY 260 MOV r0, r0, ASR #3 261 262 LDR r1, [sp, #0x2c+4] 263 LDR r4, [sp, #8+4] 264 SUB r1, r12, r1, LSL #3 265 LDR r6, [sp, #0x1c+4] 266 ADD r12, r1, #8 267 LDR r7, [sp, #0] 268 ADD r4, r4, r6 269 CMP r4, r7 270 BLE SECOND_LOOP 271 272SECOND_LOOP_2: 273 LDR r3, [sp, #0x10+4] 274 LDR r14, [sp, #0x18+4] 275 MOV r0, r0, LSL #3 @(del<<1) * 4 276 277 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 278 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 279 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 280 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 281 SUB r3, r3, #2048 @ 512 *4 282 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 283 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 284 285 STR r4, [sp, #8+4] 286 287 STR r1, [sp, #-4] 288 STR r2, [sp, #-8] 289 STR r5, [sp, #-12] 290 STR r6, [sp, #-16] 291 STR r7, [sp, #-20] 292 STR r8, [sp, #-24] 293 294RADIX4_BFLY_2: 295 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 296 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 297 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 298 SUBS r14, r14, #1 299 LDR r1, [sp, #-4] 300 LDR r2, [sp, #-8] 301 302 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 303 LSR r3, r3, #31 304 ORR r4, r3, r4, LSL#1 305 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 306 LSR r3, r3, #31 307 ORR r6, r3, r6, LSL#1 308 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 309 LSR r3, r3, #31 310 ORR r5, r3, r5, LSL#1 311 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 312 LSR r3, r3, #31 313 ORR r7, r3, r7, LSL#1 314 ADD r7, r7, r6 315 SUB r6, r4, r5 @ 316 317 LDR r1, [sp, #-12] 318 LDR r2, [sp, #-16] 319 320 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 321 LSR r3, r3, #31 322 ORR r4, r3, r4, LSL#1 323 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 324 LSR r3, r3, #31 325 ORR r8, r3, r8, LSL#1 326 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 327 LSR r3, r3, #31 328 ORR r5, r3, r5, LSL#1 329 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 330 LSR r3, r3, #31 331 ORR r9, r3, r9, LSL#1 332 ADD r9, r9, r8 333 SUB r8, r4, r5 @ 334 335 LDR r1, [sp, #-20] 336 LDR r2, [sp, #-24] 337 338 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 339 LSR r3, r3, #31 340 ORR r4, r3, r4, LSL#1 341 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 342 LSR r3, r3, #31 343 ORR r10, r3, r10, LSL#1 344 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 345 LSR r3, r3, #31 346 ORR r5, r3, r5, LSL#1 347 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 348 LSR r3, r3, #31 349 ORR r11, r3, r11, LSL#1 350 ADD r10, r11, r10 351 SUB r11, r5, r4 @ 352 353 @SUB r12,r12,r0,lsl #1 354 @LDRD r4,[r12] @r4=x0r, r5=x0i 355 LDR r4, [r12, -r0, lsl #1]! @ 356 LDR r5, [r12, #4] 357 358 359 ADD r4, r8, r4 @x0r = x0r + x2r@ 360 ADD r5, r9, r5 @x0i = x0i + x2i@ 361 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 362 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 363 ADD r6, r6, r10 @x1r = x1r + x3r@ 364 ADD r7, r7, r11 @x1i = x1i + x3i@ 365 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 366 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 367 368 ADD r4, r4, r6 @x0r = x0r + x1r@ 369 ADD r5, r5, r7 @x0i = x0i + x1i@ 370 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 371 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 372 STRD r4, [r12] @r4=x0r, r5=x0i 373 ADD r12, r12, r0 374 375 ADD r8, r8, r11 @x2r = x2r + x3i@ 376 SUB r9, r9, r10 @x2i = x2i - x3r@ 377 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 378 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 379 380 STRD r8, [r12] @r8=x2r, r9=x2i 381 ADD r12, r12, r0 382 STRD r6, [r12] @r6=x1r, r7=x1i 383 ADD r12, r12, r0 384 STRD r4, [r12] @r10=x3r, r11=x3i 385 ADD r12, r12, r0 386 387 BNE RADIX4_BFLY_2 388 MOV r0, r0, ASR #3 389 390 LDR r1, [sp, #0x2c+4] 391 LDR r4, [sp, #8+4] 392 SUB r1, r12, r1, LSL #3 393 LDR r6, [sp, #0x1c+4] 394 ADD r12, r1, #8 395 LDR r7, [sp, #0x24+4] 396 ADD r4, r4, r6 397 CMP r4, r7, ASR #1 398 BLE SECOND_LOOP_2 399 LDR r7, [sp, #0] 400 CMP r4, r7, LSL #1 401 BGT SECOND_LOOP_4 402 403SECOND_LOOP_3: 404 LDR r3, [sp, #0x10+4] 405 LDR r14, [sp, #0x18+4] 406 MOV r0, r0, LSL #3 @(del<<1) * 4 407 408 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 409 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 410 SUB r3, r3, #2048 @ 512 *4 411 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 412 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 413 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 414 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 415 416 STR r4, [sp, #8+4] 417 STR r1, [sp, #-4] 418 STR r2, [sp, #-8] 419 STR r5, [sp, #-12] 420 STR r6, [sp, #-16] 421 STR r7, [sp, #-20] 422 STR r8, [sp, #-24] 423 424 425RADIX4_BFLY_3: 426 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 427 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 428 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 429 SUBS r14, r14, #1 430 431 LDR r1, [sp, #-4] 432 LDR r2, [sp, #-8] 433 434 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 435 LSR r3, r3, #31 436 ORR r4, r3, r4, LSL#1 437 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 438 LSR r3, r3, #31 439 ORR r6, r3, r6, LSL#1 440 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 441 LSR r3, r3, #31 442 ORR r5, r3, r5, LSL#1 443 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 444 LSR r3, r3, #31 445 ORR r7, r3, r7, LSL#1 446 ADD r7, r7, r6 447 SUB r6, r4, r5 @ 448 449 LDR r1, [sp, #-12] 450 LDR r2, [sp, #-16] 451 452 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 453 LSR r3, r3, #31 454 ORR r4, r3, r4, LSL#1 455 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 456 LSR r3, r3, #31 457 ORR r8, r3, r8, LSL#1 458 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 459 LSR r3, r3, #31 460 ORR r5, r3, r5, LSL#1 461 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 462 LSR r3, r3, #31 463 ORR r9, r3, r9, LSL#1 464 ADD r8, r9, r8 465 SUB r9, r5, r4 @ 466 467 LDR r1, [sp, #-20] 468 LDR r2, [sp, #-24] 469 470 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 471 LSR r3, r3, #31 472 ORR r4, r3, r4, LSL#1 473 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 474 LSR r3, r3, #31 475 ORR r10, r3, r10, LSL#1 476 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 477 LSR r3, r3, #31 478 ORR r5, r3, r5, LSL#1 479 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 480 LSR r3, r3, #31 481 ORR r11, r3, r11, LSL#1 482 ADD r10, r11, r10 483 SUB r11, r5, r4 @ 484 485 @SUB r12,r12,r0,lsl #1 486 @LDRD r4,[r12] @r4=x0r, r5=x0i 487 LDR r4, [r12, -r0, lsl #1]! @ 488 LDR r5, [r12, #4] 489 490 491 ADD r4, r8, r4 @x0r = x0r + x2r@ 492 ADD r5, r9, r5 @x0i = x0i + x2i@ 493 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 494 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 495 ADD r6, r6, r10 @x1r = x1r + x3r@ 496 ADD r7, r7, r11 @x1i = x1i + x3i@ 497 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 498 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 499 500 ADD r4, r4, r6 @x0r = x0r + x1r@ 501 ADD r5, r5, r7 @x0i = x0i + x1i@ 502 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 503 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 504 STRD r4, [r12] @r4=x0r, r5=x0i 505 ADD r12, r12, r0 506 507 ADD r8, r8, r11 @x2r = x2r + x3i@ 508 SUB r9, r9, r10 @x2i = x2i - x3r@ 509 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 510 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 511 512 STRD r8, [r12] @r8=x2r, r9=x2i 513 ADD r12, r12, r0 514 STRD r6, [r12] @r6=x1r, r7=x1i 515 ADD r12, r12, r0 516 STRD r4, [r12] @r10=x3r, r11=x3i 517 ADD r12, r12, r0 518 519 BNE RADIX4_BFLY_3 520 MOV r0, r0, ASR #3 521 522 LDR r1, [sp, #0x2c+4] 523 LDR r4, [sp, #8+4] 524 SUB r1, r12, r1, LSL #3 525 LDR r6, [sp, #0x1c+4] 526 ADD r12, r1, #8 527 LDR r7, [sp, #0] 528 ADD r4, r4, r6 529 CMP r4, r7, LSL #1 530 BLE SECOND_LOOP_3 531 532SECOND_LOOP_4: 533 LDR r3, [sp, #0x10+4] 534 LDR r14, [sp, #0x18+4] 535 MOV r0, r0, LSL #3 @(del<<1) * 4 536 537 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 538 LDR r2, [r3, #4] @w1l = *(twiddles + 2*j + 1)@ 539 SUB r3, r3, #2048 @ 512 *4 540 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 541 LDR r6, [r3, #4] @w2l = *(twiddles + 2*(j<<1) + 1)@ 542 SUB r3, r3, #2048 @ 512 *4 543 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 544 LDR r8, [r3, #4] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 545 546 547 STR r4, [sp, #8+4] 548 STR r1, [sp, #-4] 549 STR r2, [sp, #-8] 550 STR r5, [sp, #-12] 551 STR r6, [sp, #-16] 552 STR r7, [sp, #-20] 553 STR r8, [sp, #-24] 554 555RADIX4_BFLY_4: 556 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 557 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 558 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 559 SUBS r14, r14, #1 560 561 LDR r1, [sp, #-4] 562 LDR r2, [sp, #-8] 563 564 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 565 LSR r3, r3, #31 566 ORR r4, r3, r4, LSL#1 567 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 568 LSR r3, r3, #31 569 ORR r6, r3, r6, LSL#1 570 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 571 LSR r3, r3, #31 572 ORR r5, r3, r5, LSL#1 573 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 574 LSR r3, r3, #31 575 ORR r7, r3, r7, LSL#1 576 ADD r7, r7, r6 577 SUB r6, r4, r5 @ 578 579 LDR r1, [sp, #-12] 580 LDR r2, [sp, #-16] 581 582 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 583 LSR r3, r3, #31 584 ORR r4, r3, r4, LSL#1 585 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 586 LSR r3, r3, #31 587 ORR r8, r3, r8, LSL#1 588 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 589 LSR r3, r3, #31 590 ORR r5, r3, r5, LSL#1 591 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 592 LSR r3, r3, #31 593 ORR r9, r3, r9, LSL#1 594 ADD r8, r9, r8 595 SUB r9, r5, r4 @ 596 597 LDR r1, [sp, #-20] 598 LDR r2, [sp, #-24] 599 600 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 601 LSR r3, r3, #31 602 ORR r4, r3, r4, LSL#1 603 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 604 LSR r3, r3, #31 605 ORR r10, r3, r10, LSL#1 606 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 607 LSR r3, r3, #31 608 ORR r5, r3, r5, LSL#1 609 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 610 LSR r3, r3, #31 611 ORR r11, r3, r11, LSL#1 612 ADD r11, r11, r10 613 SUB r10, r5, r4 @ 614 615 @SUB r12,r12,r0,lsl #1 616 @LDRD r4,[r12] @r4=x0r, r5=x0i 617 LDR r4, [r12, -r0, lsl #1]! @ 618 LDR r5, [r12, #4] 619 620 621 ADD r4, r8, r4 @x0r = x0r + x2r@ 622 ADD r5, r9, r5 @x0i = x0i + x2i@ 623 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 624 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 625 ADD r6, r6, r10 @x1r = x1r + x3r@ 626 SUB r7, r7, r11 @x1i = x1i - x3i@ 627 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 628 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 629 630 ADD r4, r4, r6 @x0r = x0r + x1r@ 631 ADD r5, r5, r7 @x0i = x0i + x1i@ 632 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 633 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 634 STRD r4, [r12] @r4=x0r, r5=x0i 635 ADD r12, r12, r0 636 ADD r8, r8, r11 @x2r = x2r + x3i@ 637 SUB r9, r9, r10 @x2i = x2i - x3r@ 638 SUB r4, r8, r11, lsl#1 @x3i = x2r - (x3i << 1)@ 639 ADD r5, r9, r10, lsl#1 @x3r = x2i + (x3r << 1) 640 641 STRD r8, [r12] @r8=x2r, r9=x2i 642 ADD r12, r12, r0 643 STRD r6, [r12] @r6=x1r, r7=x1i 644 ADD r12, r12, r0 645 STRD r4, [r12] @r10=x3r, r11=x3i 646 ADD r12, r12, r0 647 648 BNE RADIX4_BFLY_4 649 MOV r0, r0, ASR #3 650 651 LDR r1, [sp, #0x2c+4] 652 LDR r4, [sp, #8+4] 653 SUB r1, r12, r1, LSL #3 654 LDR r6, [sp, #0x1c+4] 655 ADD r12, r1, #8 656 LDR r7, [sp, #0x24+4] 657 ADD r4, r4, r6 658 CMP r4, r7 659 BLT SECOND_LOOP_4 660 ADD sp, sp, #4 661 662 LDR r1, [sp, #0x1c] 663 MOV r0, r0, LSL #2 664 MOV r1, r1, ASR #2 665 STR r1, [sp, #0x1c] 666 LDR r1, [sp, #0x18] 667 MOV r1, r1, ASR #2 668 STR r1, [sp, #0x18] 669 LDR r1, [sp, #0x20] 670 SUBS r1, r1, #1 671 STR r1, [sp, #0x20] 672 BGT OUTER_LOOP 673 674 LDR r1, [sp, #0x14] 675 CMP r1, #0 676 BEQ EXIT 677 LDR r12, [sp, #0x1c] 678 LDR r1, [sp, #0x28] 679 CMP r12, #0 680 LDRNE r12, [sp, #0x1c] 681 MOVEQ r4, #1 682 MOVNE r4, r12, LSL #1 683 MOVS r3, r0 684 BEQ EXIT 685 686 MOV r3, r3, ASR #1 687 LDR r5, [sp, #0x34] 688 MOV r0, r0, LSL #3 @(del<<1) * 4 689 STR r1, [sp, #-4] 690 691EXIT: 692 ADD sp, sp, #0x38 693 LDMFD sp!, {r4-r12, pc} 694 695