1.macro push_v_regs
2    stp             X8, X9, [sp, #-16]!
3    stp             X10, X11, [sp, #-16]!
4    stp             X12, X13, [sp, #-16]!
5    stp             X14, X15, [sp, #-16]!
6    stp             X20, X21, [sp, #-16]!
7    stp             X26, X17, [sp, #-16]!
8    stp             X27, X28, [sp, #-16]!
9    stp             q2, q3, [sp, #-32]!
10    stp             q0, q1, [sp, #-32]!
11.endm
12.macro pop_v_regs
13    ldp             q0, q1, [sp], #32
14    ldp             q2, q3, [sp], #32
15    ldp             X27, X28, [sp], #16
16    ldp             X26, X17, [sp], #16
17    ldp             X20, X21, [sp], #16
18    ldp             X14, X15, [sp], #16
19    ldp             X12, X13, [sp], #16
20    ldp             X10, X11, [sp], #16
21    ldp             X8, X9, [sp], #16
22.endm
23
24.text
25.p2align 2
26    .global ixheaacd_shiftrountine_with_rnd
27ixheaacd_shiftrountine_with_rnd:
28    push_v_regs
29
30    ADD             x12, x2, x3, LSL #1
31    MOV             W9, #0x00008000
32    DUP             V0.4s, w9
33    MOVI            v3.4s, #10
34    MOV             W27, #0x80000000
35    MOV             W28, #0x7fffffff
36    MOV             W26, #0
37    SUBS            W3, W3, #1
38    BMI             S_WITH_R_L6
39
40S_WITH_R_L5:
41    LDR             w5, [x1, x3, LSL #2] //i2 = qmfImag[j]
42    LDR             w7, [x0, x3, LSL #2] //x2 = qmfReal[j]
43    LDR             w14, [x0], #4       //x1 = *qmfReal
44    LDR             w10, [x1], #4       //i1 = *qmfImag
45
46    ADD             w6, w5, w7          //*qmfImag++ = add32(i2, x2)
47    SUB             w5, w5, w7          //qmfReal[j] = sub32(i2, x2)
48    ADD             w7, w10, w14        //qmfImag[j] = add32(i1, x1)
49    SUB             w4, w10, w14        //*qmfReal++ = sub32(i1, x1)
50
51    MOV             v1.s[0], W4         //QADD        x4, x4, x9
52    MOV             v1.s[1], W5         //QADD        x4, x4, x9
53    MOV             v1.s[2], W6         //QADD        x4, x4, x9
54    MOV             v1.s[3], W7         //QADD        x4, x4, x9
55    lsl             w14, w3, #1
56
57    SQSHL           v1.4s, v1.4s, v3.4s
58    ADD             X17, X2, X14
59
60    SQADD           v2.4s, v1.4s, v0.4s
61
62    ST1             {v2.h}[1], [x2], #2
63    ST1             {v2.h}[3], [X17]
64    ADD             X17, X12, X14
65    ST1             {v2.h}[7], [x17]    //STRH   w7, [x12, x14]
66    ST1             {v2.h}[5], [x12], #2 //STRH   w6, [x12], #2
67
68    SUBS            x3, x3, #2
69
70    BGE             S_WITH_R_L5
71S_WITH_R_L6:
72    pop_v_regs
73    ret
74