1.macro push_v_regs
2    stp             d8, d9, [sp, #-16]!
3    stp             d10, d11, [sp, #-16]!
4    stp             d12, d13, [sp, #-16]!
5    stp             d14, d15, [sp, #-16]!
6    stp             X8, X9, [sp, #-16]!
7    stp             X10, X11, [sp, #-16]!
8    stp             X12, X13, [sp, #-16]!
9    stp             X14, X15, [sp, #-16]!
10    stp             X16, X17, [sp, #-16]!
11    stp             X29, X30, [sp, #-16]!
12.endm
13.macro pop_v_regs
14    ldp             X29, X30, [sp], #16
15    ldp             X16, X17, [sp], #16
16    ldp             X14, X15, [sp], #16
17    ldp             X12, X13, [sp], #16
18    ldp             X10, X11, [sp], #16
19    ldp             X8, X9, [sp], #16
20    ldp             d14, d15, [sp], #16
21    ldp             d12, d13, [sp], #16
22    ldp             d10, d11, [sp], #16
23    ldp             d8, d9, [sp], #16
24.endm
25
26.text
27.p2align 2
28    .global ixheaacd_sbr_qmfanal32_winadds
29
30ixheaacd_sbr_qmfanal32_winadds:         // PROC
31
32    // STMFD sp!, {x4-x12, x14}
33    push_v_regs
34    stp             x19, x20, [sp, #-16]!
35    //VPUSH       {D8 - D15}
36    //LDR w5,  [SP, #108]              //filterStates
37    //sxtw x5,w5
38    //LDR w6,  [SP, #112]              //timeIn
39    //sxtw x6,w6
40    //LDR w7,  [SP, #116]              //stride
41    //sxtw x7,w7
42
43    LSL             x9, x7, #1
44
45
46    MOV             x20, x4
47    ADD             x5, x5, #64
48    MOV             w10, #3
49
50    //ADD         x5, x5, #56
51    //MOV         x10, #1
52    ////SUB         x6, x6, x9
53    //CMP           x7, #1
54    //MOV         x11, #-8
55    //BGT         LOOP_SKIP_ODD
56
57LOOP:
58    LDRSH           w4  , [x6]
59    ADD             x6, x6, x9
60    LDRSH           w8  , [x6]
61    ADD             x6, x6, x9
62    LDRSH           w11  , [x6]
63    ADD             x6, x6, x9
64    LDRSH           w12 , [x6]
65    ADD             x6, x6, x9
66
67    STRH            w4  , [x5 , #-2]!
68    STRH            w8  , [x5 , #-2]!
69    STRH            w11  , [x5 , #-2]!
70    STRH            w12 , [x5 , #-2]!
71
72    LDRSH           w4  , [x6]
73    ADD             x6, x6, x9
74    LDRSH           w8  , [x6]
75    ADD             x6, x6, x9
76    LDRSH           w11  , [x6]
77    ADD             x6, x6, x9
78    LDRSH           w12 , [x6]
79    ADD             x6, x6, x9
80
81    STRH            w4  , [x5 , #-2]!
82    STRH            w8  , [x5 , #-2]!
83    STRH            w11  , [x5 , #-2]!
84    STRH            w12 , [x5 , #-2]!
85    SUBS            w10, w10, #1
86
87    BPL             LOOP
88
89
90//LOOP:
91//  LD1 {v0.4h} , [x6], #8
92//  LD1 {v1.4h} , [x6], #8
93//
94//  REV64  v4.4h , v0.4h
95//  REV64  v5.4h , v1.4h
96//
97//  ST1 {v4.4h} , [x5] , x11
98//  ST1 {v5.4h} , [x5] , x11
99//
100//  LD1 {v2.4h} , [x6], #8
101//  LD1 {v3.4h} , [x6], #8
102//
103//  REV64  v6.4h , v2.4h
104//  REV64  v7.4h , v3.4h
105//
106//  ST1 {v6.4h} , [x5] , x11
107//  ST1 {v7.4h} , [x5] , x11
108//
109//    SUBS        x10, x10, #1
110//    BPL         LOOP
111//  B       SKIP_LOOP
112//
113//LOOP_SKIP_ODD:
114//  LD2 {v0.4h , v1.4h} , [x6], #16
115//  LD2 {v2.4h , v3.4h} , [x6], #16
116//
117//  REV64  v1.4h , v0.4h
118//  REV64  v3.4h , v2.4h
119//
120//  ST1 {v1.4h} , [x5], x11
121//  ST1 {v3.4h} , [x5], x11
122//
123//  LD2 {v4.4h , v5.4h} , [x6], #16
124//  LD2 {v6.4h , v7.4h} , [x6], #16
125//
126//
127//  REV64  v5.4h , v4.4h
128//  REV64  v7.4h , v6.4h
129//
130//  ST1 {v5.4h} , [x5], x11
131//  ST1 {v7.4h} , [x5], x11
132//
133//    SUBS        x10, x10, #1
134//    BPL         LOOP_SKIP_ODD
135
136SKIP_LOOP:
137
138    //LDR w4,  [SP, #104]              //winAdd
139    // sxtw x4,w4
140
141    MOV             x4, x20
142    MOV             x5, #8
143    LD1             {v0.4h}, [x0], #8
144    MOV             x6, #64
145
146    LSL             x6, x6, #1
147    LD2             {v1.4h, v2.4h}, [x2], #16
148    MOV             x7, #244
149
150    MOV             x9, x0
151    ADD             x0, x0, #120
152
153    MOV             x11, x4
154    LD1             {v2.4h}, [x0], x6
155    ADD             x11, x11, #128
156
157
158
159
160    MOV             x10, x2
161    ADD             x2, x2, #240
162
163    sMULL           v30.4s, v0.4h, v1.4h
164    LD2             {v3.4h, v4.4h}, [x2], #16
165    ADD             x2, x2, #240
166
167
168    LD1             {v4.4h}, [x0], x6
169    sMLAL           v30.4s, v2.4h, v3.4h
170
171    LD2             {v5.4h, v6.4h}, [x2], #16
172
173
174    ADD             x2, x2, #240
175    LD1             {v6.4h}, [x0], x6
176    sMLAL           v30.4s, v4.4h, v5.4h
177
178    LD2             {v7.4h, v8.4h}, [x2], #16
179
180
181    ADD             x2, x2, #240
182    LD1             {v8.4h}, [x0], x6
183    sMLAL           v30.4s, v6.4h, v7.4h
184
185    MOV             x0, x9
186    LD2             {v9.4h, v10.4h}, [x2], #16
187
188
189    ADD             x2, x2, #240
190    LD1             {v10.4h}, [x1], #8
191    sMLAL           v30.4s, v8.4h, v9.4h
192
193
194
195    MOV             x9, x1
196    LD2             {v11.4h, v12.4h}, [x3], #16
197    ADD             x1, x1, #120
198
199
200    MOV             x2, x10
201    LD1             {v12.4h}, [x1], x6
202    MOV             x10, x3
203
204    ADD             x3, x3, #240
205    LD2             {v13.4h, v14.4h}, [x3], #16
206    ADD             x3, x3, #240
207
208
209    LD2             {v15.4h, v16.4h}, [x3], #16
210
211    LD1             {v14.4h}, [x1], x6
212    ADD             x3, x3, #240
213
214
215
216    LD1             {v16.4h}, [x1], x6
217    SUB             x5, x5, #1
218
219    LD2             {v17.4h, v18.4h}, [x3], #16
220
221
222    ADD             x3, x3, #240
223    LD1             {v18.4h}, [x1], x6
224
225    MOV             x1, x9
226    LD2             {v19.4h, v20.4h}, [x3], #16
227
228    ADD             x3, x3, #240
229
230    MOV             x3, x10
231
232
233LOOP_1:
234
235
236    LD1             {v0.4h}, [x0], #8
237
238    MOV             x9, x0
239    LD2             {v1.4h, v2.4h}, [x2], #16
240    ADD             x0, x0, #120
241
242    MOV             x10, x2
243    ST1             { v30.4s}, [x4], #16
244    ADD             x2, x2, #240
245
246
247    sMULL           v30.4s, v10.4h, v11.4h
248    LD1             {v2.4h}, [x0], x6
249    sMLAL           v30.4s, v12.4h, v13.4h
250
251    sMLAL           v30.4s, v14.4h, v15.4h
252    LD2             {v3.4h, v4.4h}, [x2], #16
253    sMLAL           v30.4s, v16.4h, v17.4h
254
255    sMLAL           v30.4s, v18.4h, v19.4h
256    LD1             {v4.4h}, [x0], x6
257    ADD             x2, x2, #240
258
259    ST1             { v30.4s}, [x11], #16
260
261
262    sMULL           v30.4s, v0.4h, v1.4h
263    LD2             {v5.4h, v6.4h}, [x2], #16
264    sMLAL           v30.4s, v2.4h, v3.4h
265
266
267
268    ADD             x2, x2, #240
269    LD1             {v6.4h}, [x0], x6
270    sMLAL           v30.4s, v4.4h, v5.4h
271
272    LD2             {v7.4h, v8.4h}, [x2], #16
273
274
275    ADD             x2, x2, #240
276    LD1             {v8.4h}, [x0], x6
277    sMLAL           v30.4s, v6.4h, v7.4h
278
279    MOV             x0, x9
280    LD2             {v9.4h, v10.4h}, [x2], #16
281
282
283
284    ADD             x2, x2, #240
285    LD1             {v10.4h}, [x1], #8
286    MOV             x2, x10
287
288    MOV             x9, x1
289    LD2             {v11.4h, v12.4h}, [x3], #16
290    ADD             x1, x1, #120
291
292
293    sMLAL           v30.4s, v8.4h, v9.4h
294    LD1             {v12.4h}, [x1], x6
295    MOV             x10, x3
296
297
298    ADD             x3, x3, #240
299    LD2             {v13.4h, v14.4h}, [x3], #16
300    ADD             x3, x3, #240
301
302
303
304    LD1             {v14.4h}, [x1], x6
305    LD2             {v15.4h, v16.4h}, [x3], #16
306    ADD             x3, x3, #240
307
308
309    LD1             {v16.4h}, [x1], x6
310    LD2             {v17.4h, v18.4h}, [x3], #16
311    ADD             x3, x3, #240
312
313
314    LD1             {v18.4h}, [x1], x6
315    SUBS            x5, x5, #1
316
317    MOV             x1, x9
318    LD2             {v19.4h, v20.4h}, [x3], #16
319
320    ADD             x3, x3, #240
321
322    MOV             x3, x10
323
324    BGT             LOOP_1
325
326    ST1             { v30.4s}, [x4], #16
327    sMULL           v30.4s, v10.4h, v11.4h
328    sMLAL           v30.4s, v12.4h, v13.4h
329
330    sMLAL           v30.4s, v14.4h, v15.4h
331    sMLAL           v30.4s, v16.4h, v17.4h
332    sMLAL           v30.4s, v18.4h, v19.4h
333
334    ST1             { v30.4s}, [x11], #16
335
336    //VPOP        {D8 - D15}
337    // LDMFD sp!, {x4-x12, x15}
338    ldp             x19, x20, [sp], #16
339    pop_v_regs
340    ret
341    // ENDP
342