1.text 2.p2align 2 3.global ixheaacd_complex_ifft_p2_asm 4 5ixheaacd_complex_ifft_p2_asm: 6 STMFD sp!, {r0-r12, lr} 7 SUB sp, sp, #0x44 8 LDR r0, [sp, #0x48] 9 EOR r0, r0, r0, ASR #31 10 CLZ r0, r0 11 SUB r12, r0, #16 @dig_rev_shift = norm32(npoints) + 1 -16@ 12 SUB r0, r0, #1 13 RSB r0, r0, #0x1e 14 AND r1, r0, #1 15 STR r1, [sp, #0x30] 16 MOV r1, r0, ASR #1 17 LDR r0, [sp, #0x48] @npoints 18 STR r1, [sp, #0x18] 19 MOV lr, r0, LSL #1 @(npoints >>1) * 4 20 MOV r0, #0 21 22FIRST_STAGE_R4: 23 MOVW r4, #0x3333 24 MOVT r4, #0x3333 25 MOVW r5, #0x0F0F 26 MOVT r5, #0x0F0F 27 AND r6, r4, r0 28 AND r7, r4, r0, LSR #2 29 ORR r4, r7, r6, LSL #2 30 AND r6, r5, r4 31 AND r7, r5, r4, LSR #4 32 ORR r4, r7, r6, LSL #4 33 BIC r6, r4, #0x0000FF00 34 BIC r7, r4, #0x00FF0000 35 MOV r7, r7, LSR #8 36 ORR r4, r7, r6, LSL #8 37 LDR r5, [sp, #0x30] 38 MOV r10, r4, LSR r12 39 CMP r5, #0 40 ADDNE r10, r10, #1 41 BICNE r10, r10, #1 42 43 ADD r1, r2, r10, LSL #2 44 LDRD r4, [r1] @r4=x0r, r5=x0i 45 ADD r1, r1, lr 46 LDRD r8, [r1] @r8=x1r, r9=x1i 47 ADD r1, r1, lr 48 LDRD r6, [r1] @r6=x2r, r7=x2i 49 ADD r1, r1, lr 50 LDRD r10, [r1] @r10=x3r, r11=x3i 51 ADD r0, r0, #4 52 CMP r0, lr, ASR #1 53 54 ADD r4, r4, r6 @x0r = x0r + x2r@ 55 ADD r5, r5, r7 @x0i = x0i + x2i@ 56 SUB r6, r4, r6, lsl#1 @x2r = x0r - (x2r << 1)@ 57 SUB r7, r5, r7, lsl#1 @x2i = x0i - (x2i << 1)@ 58 ADD r8, r8, r10 @x1r = x1r + x3r@ 59 ADD r9, r9, r11 @x1i = x1i + x3i@ 60 SUB r1, r8, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 61 SUB r11, r9, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 62 63 ADD r4, r4, r8 @x0r = x0r + x1r@ 64 ADD r5, r5, r9 @x0i = x0i + x1i@ 65 SUB r8, r4, r8, lsl#1 @x1r = x0r - (x1r << 1)@ 66 SUB r9, r5, r9, lsl#1 @x1i = x0i - (x1i << 1) 67 SUB r6, r6, r11 @x2r = x2r - x3i@ 68 ADD r7, r7, r1 @x2i = x2i + x3r@ 69 ADD r10, r6, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 70 SUB r11, r7, r1, lsl#1 @x3r = x2i - (x3r << 1)@ 71 72 STMIA r3!, {r4-r11} 73 BLT FIRST_STAGE_R4 74 LDR r1, [sp, #0x18] 75 LDR r0, [sp, #0x48] 76 MOV r12, #0x40 @nodespacing = 64@ 77 STR r12, [sp, #0x38] 78 LDR r12, [sp, #0x48] 79 SUB r3, r3, r0, LSL #3 80 SUBS r1, r1, #1 81 STR r3, [sp, #0x50] 82 MOV r4, r12, ASR #4 83 MOV r0, #4 84 STR r4, [sp, #0x34] 85 STR r1, [sp, #0x3c] 86 BLE RADIX2 87OUTER_LOOP: 88 LDR r1, [sp, #0x44] 89 LDR r12, [sp, #0x50] @WORD32 *data = ptr_y@ 90 STR r1, [sp, #0x2c] 91 LDR r1, [sp, #0x34] 92 93 MOV r0, r0, LSL #3 @(del<<1) * 4 94LOOP_TRIVIAL_TWIDDLE: 95 LDRD r4, [r12] @r4=x0r, r5=x0i 96 ADD r12, r12, r0 97 LDRD r6, [r12] @r6=x1r, r7=x1i 98 ADD r12, r12, r0 99 LDRD r8, [r12] @r8=x2r, r9=x2i 100 ADD r12, r12, r0 101 LDRD r10, [r12] @r10=x3r, r11=x3i 102 103@MOV r4,r4,ASR #1 104@MOV r5,r5,ASR #1 105@MOV r6,r6,ASR #1 106@MOV r7,r7,ASR #1 107@MOV r8,r8,ASR #1 108@MOV r9,r9,ASR #1 109@MOV r10,r10,ASR #1 110@MOV r11,r11,ASR #1 111 112 ADD r4, r4, r8 @x0r = x0r + x2r@ 113 ADD r5, r5, r9 @x0i = x0i + x2i@ 114 SUB r8, r4, r8, lsl #1 @x2r = x0r - (x2r << 1)@ 115 SUB r9, r5, r9, lsl #1 @x2i = x0i - (x2i << 1)@ 116 ADD r6, r6, r10 @x1r = x1r + x3r@ 117 ADD r7, r7, r11 @x1i = x1i + x3i@ 118 SUB r2, r6, r10, lsl #1 @x3r = x1r - (x3r << 1)@ 119 SUB r11, r7, r11, lsl #1 @x3i = x1i - (x3i << 1)@ 120 121 ADD r4, r4, r6 @x0r = x0r + x1r@ 122 ADD r5, r5, r7 @x0i = x0i + x1i@ 123@MOV r4,r4,ASR #1 124@MOV r5,r5,ASR #1 125 SUB r6, r4, r6, lsl #1 @x1r = x0r - (x1r << 1)@ 126 SUB r7, r5, r7, lsl #1 @x1i = x0i - (x1i << 1) 127 SUB r8, r8, r11 @x2r = x2r - x3i@ 128 ADD r9, r9, r2 @x2i = x2i + x3r@ 129 ADD r10, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 130 SUB r11, r9, r2, lsl#1 @x3r = x2i - (x3r << 1) 131 132 STRD r10, [r12] @r10=x3r, r11=x3i 133 SUB r12, r12, r0 134 STRD r6, [r12] @r6=x1r, r7=x1i 135 SUB r12, r12, r0 136 STRD r8, [r12] @r8=x2r, r9=x2i 137 SUB r12, r12, r0 138 STRD r4, [r12] @r4=x0r, r5=x0i 139 ADD r12, r12, r0, lsl #2 140 141 SUBS r1, r1, #1 142 BNE LOOP_TRIVIAL_TWIDDLE 143 144 MOV r0, r0, ASR #3 145 LDR r4, [sp, #0x38] 146 LDR r3, [sp, #0x50] 147 MUL r1, r0, r4 148 ADD r12, r3, #8 149 STR r1, [sp, #0x40] 150 MOV r3, r1, ASR #2 151 ADD r3, r3, r1, ASR #3 152 SUB r3, r3, r1, ASR #4 153 ADD r3, r3, r1, ASR #5 154 SUB r3, r3, r1, ASR #6 155 ADD r3, r3, r1, ASR #7 156 SUB r3, r3, r1, ASR #8 157 STR r3, [sp, #0x18] 158SECOND_LOOP: 159 LDR r3, [sp, #0x2c] 160 LDR r14, [sp, #0x34] 161 MOV r0, r0, LSL #3 @(del<<1) * 4 162 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 163 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 164 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 165 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 166 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 167 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 168 169 STR r4, [sp, #0x24] 170 STR r1, [sp, #0x14] 171 STR r2, [sp, #0x10] 172 STR r5, [sp, #0x0c] 173 STR r6, [sp, #0x08] 174 STR r7, [sp, #0x04] 175 STR r8, [sp] 176 177RADIX4_BFLY: 178 179 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 180 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 181 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 182 SUBS r14, r14, #1 183 184 LDR r1, [sp, #0x14] 185 LDR r2, [sp, #0x10] 186 187 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 188 LSR r3, r3, #31 189 ORR r4, r3, r4, LSL#1 190 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 191 LSR r3, r3, #31 192 ORR r6, r3, r6, LSL#1 193 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 194 LSR r3, r3, #31 195 ORR r5, r3, r5, LSL#1 196 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 197 LSR r3, r3, #31 198 ORR r7, r3, r7, LSL#1 199 SUB r7, r7, r6 200 ADD r6, r4, r5 @ 201 202 LDR r1, [sp, #0x0c] 203 LDR r2, [sp, #0x08] 204 205 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 206 LSR r3, r3, #31 207 ORR r4, r3, r4, LSL#1 208 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 209 LSR r3, r3, #31 210 ORR r8, r3, r8, LSL#1 211 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 212 LSR r3, r3, #31 213 ORR r5, r3, r5, LSL#1 214 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 215 LSR r3, r3, #31 216 ORR r9, r3, r9, LSL#1 217 SUB r9, r9, r8 218 ADD r8, r4, r5 @ 219 220 LDR r1, [sp, #0x04] 221 LDR r2, [sp] 222 223 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 224 LSR r3, r3, #31 225 ORR r4, r3, r4, LSL#1 226 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 227 LSR r3, r3, #31 228 ORR r10, r3, r10, LSL#1 229 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 230 LSR r3, r3, #31 231 ORR r5, r3, r5, LSL#1 232 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 233 LSR r3, r3, #31 234 ORR r11, r3, r11, LSL#1 235 SUB r11, r11, r10 236 ADD r10, r4, r5 @ 237 238 @SUB r12,r12,r0,lsl #1 239 @LDRD r4,[r12] @r4=x0r, r5=x0i 240 LDR r4, [r12, -r0, lsl #1]! @ 241 LDR r5, [r12, #0x04] 242 243 244 ADD r4, r8, r4 @x0r = x0r + x2r@ 245 ADD r5, r9, r5 @x0i = x0i + x2i@ 246 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 247 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 248 ADD r6, r6, r10 @x1r = x1r + x3r@ 249 ADD r7, r7, r11 @x1i = x1i + x3i@ 250 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 251 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 252 253 ADD r4, r4, r6 @x0r = x0r + x1r@ 254 ADD r5, r5, r7 @x0i = x0i + x1i@ 255 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 256 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 257 STRD r4, [r12] @r4=x0r, r5=x0i 258 ADD r12, r12, r0 259 260 SUB r8, r8, r11 @x2r = x2r - x3i@ 261 ADD r9, r9, r10 @x2i = x2i + x3r@ 262 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 263 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 264 265 STRD r8, [r12] @r8=x2r, r9=x2i 266 ADD r12, r12, r0 267 STRD r6, [r12] @r6=x1r, r7=x1i 268 ADD r12, r12, r0 269 STRD r4, [r12] @r10=x3r, r11=x3i 270 ADD r12, r12, r0 271 272 BNE RADIX4_BFLY 273 MOV r0, r0, ASR #3 274 275 LDR r1, [sp, #0x48] 276 LDR r4, [sp, #0x24] 277 SUB r1, r12, r1, LSL #3 278 LDR r6, [sp, #0x38] 279 ADD r12, r1, #8 280 LDR r7, [sp, #0x18] 281 ADD r4, r4, r6 282 CMP r4, r7 283 BLE SECOND_LOOP 284 285SECOND_LOOP_2: 286 LDR r3, [sp, #0x2c] 287 LDR r14, [sp, #0x34] 288 MOV r0, r0, LSL #3 @(del<<1) * 4 289 290 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 291 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 292 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 293 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 294 SUB r3, r3, #2048 @ 512 *4 295 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 296 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 297 298 STR r4, [sp, #0x24] 299 300 STR r1, [sp, #0x14] 301 STR r2, [sp, #0x10] 302 STR r5, [sp, #0x0c] 303 STR r6, [sp, #0x08] 304 STR r7, [sp, #0x04] 305 STR r8, [sp] 306 307RADIX4_BFLY_2: 308 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 309 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 310 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 311 SUBS r14, r14, #1 312 LDR r1, [sp, #0x14] 313 LDR r2, [sp, #0x10] 314 315 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 316 LSR r3, r3, #31 317 ORR r4, r3, r4, LSL#1 318 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 319 LSR r3, r3, #31 320 ORR r6, r3, r6, LSL#1 321 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 322 LSR r3, r3, #31 323 ORR r5, r3, r5, LSL#1 324 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 325 LSR r3, r3, #31 326 ORR r7, r3, r7, LSL#1 327 SUB r7, r7, r6 328 ADD r6, r4, r5 @ 329 330 LDR r1, [sp, #0x0c] 331 LDR r2, [sp, #0x08] 332 333 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 334 LSR r3, r3, #31 335 ORR r4, r3, r4, LSL#1 336 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 337 LSR r3, r3, #31 338 ORR r8, r3, r8, LSL#1 339 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 340 LSR r3, r3, #31 341 ORR r5, r3, r5, LSL#1 342 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 343 LSR r3, r3, #31 344 ORR r9, r3, r9, LSL#1 345 SUB r9, r9, r8 346 ADD r8, r4, r5 @ 347 348 LDR r1, [sp, #0x04] 349 LDR r2, [sp] 350 351 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 352 LSR r3, r3, #31 353 ORR r4, r3, r4, LSL#1 354 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 355 LSR r3, r3, #31 356 ORR r10, r3, r10, LSL#1 357 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 358 LSR r3, r3, #31 359 ORR r5, r3, r5, LSL#1 360 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 361 LSR r3, r3, #31 362 ORR r11, r3, r11, LSL#1 363 SUB r10, r10, r11 364 ADD r11, r5, r4 @ 365 366 @SUB r12,r12,r0,lsl #1 367 @LDRD r4,[r12] @r4=x0r, r5=x0i 368 LDR r4, [r12, -r0, lsl #1]! @ 369 LDR r5, [r12, #0x04] 370 371 372 ADD r4, r8, r4 @x0r = x0r + x2r@ 373 ADD r5, r9, r5 @x0i = x0i + x2i@ 374 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 375 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 376 ADD r6, r6, r10 @x1r = x1r + x3r@ 377 ADD r7, r7, r11 @x1i = x1i + x3i@ 378 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 379 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 380 381 ADD r4, r4, r6 @x0r = x0r + x1r@ 382 ADD r5, r5, r7 @x0i = x0i + x1i@ 383 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 384 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 385 STRD r4, [r12] @r4=x0r, r5=x0i 386 ADD r12, r12, r0 387 388 SUB r8, r8, r11 @x2r = x2r - x3i@ 389 ADD r9, r9, r10 @x2i = x2i + x3r@ 390 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 391 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 392 393 STRD r8, [r12] @r8=x2r, r9=x2i 394 ADD r12, r12, r0 395 STRD r6, [r12] @r6=x1r, r7=x1i 396 ADD r12, r12, r0 397 STRD r4, [r12] @r10=x3r, r11=x3i 398 ADD r12, r12, r0 399 400 BNE RADIX4_BFLY_2 401 MOV r0, r0, ASR #3 402 403 LDR r1, [sp, #0x48] 404 LDR r4, [sp, #0x24] 405 SUB r1, r12, r1, LSL #3 406 LDR r6, [sp, #0x38] 407 ADD r12, r1, #8 408 LDR r7, [sp, #0x40] 409 ADD r4, r4, r6 410 CMP r4, r7, ASR #1 411 BLE SECOND_LOOP_2 412 LDR r7, [sp, #0x18] 413 CMP r4, r7, LSL #1 414 BGT SECOND_LOOP_4 415 416SECOND_LOOP_3: 417 LDR r3, [sp, #0x2c] 418 LDR r14, [sp, #0x34] 419 MOV r0, r0, LSL #3 @(del<<1) * 4 420 421 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 422 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 423 SUB r3, r3, #2048 @ 512 *4 424 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 425 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 426 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 427 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 428 429 STR r4, [sp, #0x24] 430 STR r1, [sp, #0x14] 431 STR r2, [sp, #0x10] 432 STR r5, [sp, #0x0c] 433 STR r6, [sp, #0x08] 434 STR r7, [sp, #0x04] 435 STR r8, [sp] 436 437 438RADIX4_BFLY_3: 439 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 440 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 441 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 442 SUBS r14, r14, #1 443 444 LDR r1, [sp, #0x14] 445 LDR r2, [sp, #0x10] 446 447 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 448 LSR r3, r3, #31 449 ORR r4, r3, r4, LSL#1 450 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 451 LSR r3, r3, #31 452 ORR r6, r3, r6, LSL#1 453 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 454 LSR r3, r3, #31 455 ORR r5, r3, r5, LSL#1 456 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 457 LSR r3, r3, #31 458 ORR r7, r3, r7, LSL#1 459 SUB r7, r7, r6 460 ADD r6, r4, r5 @ 461 462 LDR r1, [sp, #0x0c] 463 LDR r2, [sp, #0x08] 464 465 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 466 LSR r3, r3, #31 467 ORR r4, r3, r4, LSL#1 468 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 469 LSR r3, r3, #31 470 ORR r8, r3, r8, LSL#1 471 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 472 LSR r3, r3, #31 473 ORR r5, r3, r5, LSL#1 474 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 475 LSR r3, r3, #31 476 ORR r9, r3, r9, LSL#1 477 SUB r8, r8, r9 478 ADD r9, r5, r4 @ 479 480 LDR r1, [sp, #0x04] 481 LDR r2, [sp] 482 483 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 484 LSR r3, r3, #31 485 ORR r4, r3, r4, LSL#1 486 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 487 LSR r3, r3, #31 488 ORR r10, r3, r10, LSL#1 489 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 490 LSR r3, r3, #31 491 ORR r5, r3, r5, LSL#1 492 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 493 LSR r3, r3, #31 494 ORR r11, r3, r11, LSL#1 495 SUB r10, r10, r11 496 ADD r11, r5, r4 @ 497 498 @SUB r12,r12,r0,lsl #1 499 @LDRD r4,[r12] @r4=x0r, r5=x0i 500 LDR r4, [r12, -r0, lsl #1]! @ 501 LDR r5, [r12, #0x04] 502 503 504 ADD r4, r8, r4 @x0r = x0r + x2r@ 505 ADD r5, r9, r5 @x0i = x0i + x2i@ 506 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 507 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 508 ADD r6, r6, r10 @x1r = x1r + x3r@ 509 ADD r7, r7, r11 @x1i = x1i + x3i@ 510 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 511 SUB r11, r7, r11, lsl#1 @x3i = x1i - (x3i << 1)@ 512 513 ADD r4, r4, r6 @x0r = x0r + x1r@ 514 ADD r5, r5, r7 @x0i = x0i + x1i@ 515 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 516 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 517 STRD r4, [r12] @r4=x0r, r5=x0i 518 ADD r12, r12, r0 519 520 SUB r8, r8, r11 @x2r = x2r - x3i@ 521 ADD r9, r9, r10 @x2i = x2i + x3r@ 522 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 523 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 524 525 STRD r8, [r12] @r8=x2r, r9=x2i 526 ADD r12, r12, r0 527 STRD r6, [r12] @r6=x1r, r7=x1i 528 ADD r12, r12, r0 529 STRD r4, [r12] @r10=x3r, r11=x3i 530 ADD r12, r12, r0 531 532 BNE RADIX4_BFLY_3 533 MOV r0, r0, ASR #3 534 535 LDR r1, [sp, #0x48] 536 LDR r4, [sp, #0x24] 537 SUB r1, r12, r1, LSL #3 538 LDR r6, [sp, #0x38] 539 ADD r12, r1, #8 540 LDR r7, [sp, #0x18] 541 ADD r4, r4, r6 542 CMP r4, r7, LSL #1 543 BLE SECOND_LOOP_3 544 545SECOND_LOOP_4: 546 LDR r3, [sp, #0x2c] 547 LDR r14, [sp, #0x34] 548 MOV r0, r0, LSL #3 @(del<<1) * 4 549 550 LDR r1, [r3, r4, LSL #3]! @ w1h = *(twiddles + 2*j)@ 551 LDR r2, [r3, #0x04] @w1l = *(twiddles + 2*j + 1)@ 552 SUB r3, r3, #2048 @ 512 *4 553 LDR r5, [r3, r4, LSL #3]! @w2h = *(twiddles + 2*(j<<1))@ 554 LDR r6, [r3, #0x04] @w2l = *(twiddles + 2*(j<<1) + 1)@ 555 SUB r3, r3, #2048 @ 512 *4 556 LDR r7, [r3, r4, LSL #3]! @w3h = *(twiddles + 2*j + 2*(j<<1))@ 557 LDR r8, [r3, #0x04] @w3l = *(twiddles + 2*j + 2*(j<<1) + 1)@ 558 559 560 STR r4, [sp, #0x24] 561 STR r1, [sp, #0x14] 562 STR r2, [sp, #0x10] 563 STR r5, [sp, #0x0c] 564 STR r6, [sp, #0x08] 565 STR r7, [sp, #0x04] 566 STR r8, [sp] 567 568RADIX4_BFLY_4: 569 LDRD r6, [r12, r0]! @r6=x1r, r7=x1i 570 LDRD r8, [r12, r0]! @r8=x2r, r9=x2i 571 LDRD r10, [r12, r0] @r10=x3r, r11=x3i 572 SUBS r14, r14, #1 573 574 LDR r1, [sp, #0x14] 575 LDR r2, [sp, #0x10] 576 577 SMULL r3, r4, r6, r2 @ixheaacd_mult32(x1r,w1l) 578 LSR r3, r3, #31 579 ORR r4, r3, r4, LSL#1 580 SMULL r3, r6, r6, r1 @mult32x16hin32(x1r,W1h) 581 LSR r3, r3, #31 582 ORR r6, r3, r6, LSL#1 583 SMULL r3, r5, r7, r1 @mult32x16hin32(x1i,W1h) 584 LSR r3, r3, #31 585 ORR r5, r3, r5, LSL#1 586 SMULL r3, r7, r7, r2 @ixheaacd_mac32(ixheaacd_mult32(x1r,w1h) ,x1i,w1l) 587 LSR r3, r3, #31 588 ORR r7, r3, r7, LSL#1 589 SUB r7, r7, r6 590 ADD r6, r4, r5 @ 591 592 LDR r1, [sp, #0x0c] 593 LDR r2, [sp, #0x08] 594 595 SMULL r3, r4, r8, r2 @ixheaacd_mult32(x2r,w2l) 596 LSR r3, r3, #31 597 ORR r4, r3, r4, LSL#1 598 SMULL r3, r8, r8, r1 @mult32x16hin32(x2r,W2h) 599 LSR r3, r3, #31 600 ORR r8, r3, r8, LSL#1 601 SMULL r3, r5, r9, r1 @mult32x16hin32(x2i,W2h) 602 LSR r3, r3, #31 603 ORR r5, r3, r5, LSL#1 604 SMULL r3, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 605 LSR r3, r3, #31 606 ORR r9, r3, r9, LSL#1 607 SUB r8, r8, r9 608 ADD r9, r5, r4 @ 609 610 LDR r1, [sp, #0x04] 611 LDR r2, [sp] 612 613 SMULL r3, r4, r10, r2 @ixheaacd_mult32(x3r,w3l) 614 LSR r3, r3, #31 615 ORR r4, r3, r4, LSL#1 616 SMULL r3, r10, r10, r1 @mult32x16hin32(x3r,W3h) 617 LSR r3, r3, #31 618 ORR r10, r3, r10, LSL#1 619 SMULL r3, r5, r11, r1 @mult32x16hin32(x3i,W3h) 620 LSR r3, r3, #31 621 ORR r5, r3, r5, LSL#1 622 SMULL r3, r11, r11, r2 @ixheaacd_mac32(ixheacd_mult32(x3r,w3h) ,x3i,w3l) 623 LSR r3, r3, #31 624 ORR r11, r3, r11, LSL#1 625 SUB r11, r11, r10 626 ADD r10, r5, r4 @ 627 RSB r10, r10, #0 628 629 @SUB r12,r12,r0,lsl #1 630 @LDRD r4,[r12] @r4=x0r, r5=x0i 631 LDR r4, [r12, -r0, lsl #1]! @ 632 LDR r5, [r12, #0x04] 633 634 635 ADD r4, r8, r4 @x0r = x0r + x2r@ 636 ADD r5, r9, r5 @x0i = x0i + x2i@ 637 SUB r8, r4, r8, lsl#1 @x2r = x0r - (x2r << 1)@ 638 SUB r9, r5, r9, lsl#1 @x2i = x0i - (x2i << 1)@ 639 ADD r6, r6, r10 @x1r = x1r + x3r@ 640 SUB r7, r7, r11 @x1i = x1i - x3i@ 641 SUB r10, r6, r10, lsl#1 @x3r = x1r - (x3r << 1)@ 642 ADD r11, r7, r11, lsl#1 @x3i = x1i + (x3i << 1)@ 643 644 ADD r4, r4, r6 @x0r = x0r + x1r@ 645 ADD r5, r5, r7 @x0i = x0i + x1i@ 646 SUB r6, r4, r6, lsl#1 @x1r = x0r - (x1r << 1)@ 647 SUB r7, r5, r7, lsl#1 @x1i = x0i - (x1i << 1) 648 STRD r4, [r12] @r4=x0r, r5=x0i 649 ADD r12, r12, r0 650 651 SUB r8, r8, r11 @x2r = x2r - x3i@ 652 ADD r9, r9, r10 @x2i = x2i + x3r@ 653 ADD r4, r8, r11, lsl#1 @x3i = x2r + (x3i << 1)@ 654 SUB r5, r9, r10, lsl#1 @x3r = x2i - (x3r << 1) 655 656 STRD r8, [r12] @r8=x2r, r9=x2i 657 ADD r12, r12, r0 658 STRD r6, [r12] @r6=x1r, r7=x1i 659 ADD r12, r12, r0 660 STRD r4, [r12] @r10=x3r, r11=x3i 661 ADD r12, r12, r0 662 663 BNE RADIX4_BFLY_4 664 MOV r0, r0, ASR #3 665 666 LDR r1, [sp, #0x48] 667 LDR r4, [sp, #0x24] 668 SUB r1, r12, r1, LSL #3 669 LDR r6, [sp, #0x38] 670 ADD r12, r1, #8 671 LDR r7, [sp, #0x40] 672 ADD r4, r4, r6 673 CMP r4, r7 674 BLT SECOND_LOOP_4 675 676 LDR r1, [sp, #0x38] 677 MOV r0, r0, LSL #2 678 MOV r1, r1, ASR #2 679 STR r1, [sp, #0x38] 680 LDR r1, [sp, #0x34] 681 MOV r1, r1, ASR #2 682 STR r1, [sp, #0x34] 683 LDR r1, [sp, #0x3c] 684 SUBS r1, r1, #1 685 STR r1, [sp, #0x3c] 686 BGT OUTER_LOOP 687 688RADIX2: 689 LDR r1, [sp, #0x30] 690 CMP r1, #0 691 BEQ EXIT 692 LDR r12, [sp, #0x38] 693 LDR r1, [sp, #0x44] 694 CMP r12, #0 695 MOVEQ r4, #1 696 MOVNE r4, r12, LSL #1 697 MOVS r3, r0 698 BEQ EXIT 699 700 MOV r3, r3, ASR #1 701 LDR r5, [sp, #0x50] 702 MOV r0, r0, LSL #3 @(del<<1) * 4 703 STR r1, [sp, #0x18] 704RADIX2_BFLY: 705 LDR r1, [sp, #0x18] 706 LDRD r6, [r5] @r6 = x0r 707 ADD r5, r5, r0 708 LDRD r8, [r5] @r8 = x1r 709 710 LDR r2, [r1] 711 SUBS r3, r3, #1 712 713 714 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 715 LSR r1, r1, #31 716 ORR r11, r1, r11, LSL#1 717 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 718 LSR r1, r1, #31 719 ORR r10, r1, r10, LSL#1 720 721 722 LDR r1, [sp, #0x18] 723 LDR r2, [r1, #0x04] 724 ADD r1, r1, r4, LSL #3 725 STR r1, [sp, #0x18] 726 727 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 728 LSR r1, r1, #31 729 ORR r8, r1, r8, LSL#1 730 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 731 LSR r1, r1, #31 732 ORR r9, r1, r9, LSL#1 733 734 ADD r8, r8, r10 735 SUB r9, r9, r11 736 737 ASR r8, r8, #1 738 ASR r6, r6, #1 739 ASR r9, r9, #1 740 ASR r7, r7, #1 741 ADD r10, r8, r6 @(x0r/2) + (x1r/2) 742 ADD r11, r9, r7 @(x0i/2) + (x1i/2)@ 743 SUB r8, r6, r8 @(x0r/2) - (x1r/2) 744 SUB r9, r7, r9 @(x0i/2) - (x1i/2)@ 745 746 STRD r8, [r5] 747 SUB r5, r5, r0 748 STRD r10, [r5], #8 749 750 BNE RADIX2_BFLY 751 752 LDR r1, [sp, #0x44] 753 MOV r3, r0, ASR #4 754 STR r1, [sp, #0x18] 755RADIX2_BFLY_2: 756 LDR r1, [sp, #0x18] 757 LDRD r6, [r5] @r6 = x0r 758 ADD r5, r5, r0 759 LDRD r8, [r5] @r8 = x1r 760 761 LDR r2, [r1] 762 SUBS r3, r3, #1 763 764 765 766 SMULL r1, r11, r8, r2 @mult32x16hin32(x1r,W1h) 767 LSR r1, r1, #31 768 ORR r11, r1, r11, LSL#1 769 SMULL r1, r10, r9, r2 @mult32x16hin32(x1i,W1h) 770 LSR r1, r1, #31 771 ORR r10, r1, r10, LSL#1 772 773 774 LDR r1, [sp, #0x18] 775 LDR r2, [r1, #0x04] 776 ADD r1, r1, r4, LSL #3 777 STR r1, [sp, #0x18] 778 779 SMULL r1, r8, r8, r2 @ixheaacd_mult32(x1r,w1l) 780 LSR r1, r1, #31 781 ORR r8, r1, r8, LSL#1 782 SMULL r1, r9, r9, r2 @ixheaacd_mac32(ixheacd_mult32(x1r,w1h) ,x1i,w1l) 783 LSR r1, r1, #31 784 ORR r9, r1, r9, LSL#1 785 786 SUB r11, r11, r9 787 ADD r9, r10, r8 @ 788 MOV r8, r11 789 790 ASR r8, r8, #1 791 ASR r6, r6, #1 792 ASR r9, r9, #1 793 ASR r7, r7, #1 794 ADD r10, r8, r6 @(x0r>>1) + (x1r) 795 ADD r11, r9, r7 @(x0i>>1) + (x1i)@ 796 SUB r8, r6, r8 @(x0r>>1) - (x1r) 797 SUB r9, r7, r9 @(x0i>>1) - (x1i)@ 798 799 STRD r8, [r5] 800 SUB r5, r5, r0 801 STRD r10, [r5], #8 802 803 BNE RADIX2_BFLY_2 804 805EXIT: 806 ADD sp, sp, #0x54 807 LDMFD sp!, {r4-r12, pc} 808 809