1///******************************************************************************
2// *
3// * Copyright (C) 2018 The Android Open Source Project
4// *
5// * Licensed under the Apache License, Version 2.0 (the "License");
6// * you may not use this file except in compliance with the License.
7// * You may obtain a copy of the License at:
8// *
9// * http://www.apache.org/licenses/LICENSE-2.0
10// *
11// * Unless required by applicable law or agreed to in writing, software
12// * distributed under the License is distributed on an "AS IS" BASIS,
13// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14// * See the License for the specific language governing permissions and
15// * limitations under the License.
16// *
17// *****************************************************************************
18// * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19//*/
20
21
22.macro push_v_regs
23    stp             q8, q9, [sp, #-32]!
24    stp             q10, q11, [sp, #-32]!
25    stp             q12, q13, [sp, #-32]!
26    stp             q14, q15, [sp, #-32]!
27    stp             X8, X9, [sp, #-16]!
28    stp             X10, X11, [sp, #-16]!
29    stp             X12, X13, [sp, #-16]!
30    stp             X14, X15, [sp, #-16]!
31    stp             X16, X17, [sp, #-16]!
32    stp             X29, X30, [sp, #-16]!
33.endm
34.macro pop_v_regs
35    ldp             X29, X30, [sp], #16
36    ldp             X16, X17, [sp], #16
37    ldp             X14, X15, [sp], #16
38    ldp             X12, X13, [sp], #16
39    ldp             X10, X11, [sp], #16
40    ldp             X8, X9, [sp], #16
41    ldp             q14, q15, [sp], #32
42    ldp             q12, q13, [sp], #32
43    ldp             q10, q11, [sp], #32
44    ldp             q8, q9, [sp], #32
45.endm
46.text
47.global ixheaacd_neg_shift_spec_armv8
48ixheaacd_neg_shift_spec_armv8:
49    push_v_regs
50    MOV             X5, #448
51    SUB             X6, X5, #1
52    LSL             X6, X6, #2
53    ADD             X6, X6, X0
54    MOV             X8, #-16
55    SUB             X6, X6, #12
56    LSL             X7, X3, #1
57    DUP             V31.4S, W2
58    MOV             W4, #0x8000
59    DUP             V30.4S, W4
60
61    LD1             {V0.4S}, [X6], X8
62    SQNEG           V0.4S, V0.4S
63
64    LD1             {V6.4S}, [X6], X8
65    SQSHL           V25.4S, V0.4S, V31.4S
66    SQADD           V24.4S, V25.4S, V30.4S
67    SSHR            V23.4S, V24.4S, #16
68    REV64           V23.4S, V23.4S
69    SUB             X5, X5, #8
70
71    UZP1            V27.8H, V23.8H, V23.8H
72    SQNEG           V29.4S, V6.4S
73
74LOOP_1:
75
76    ST1             {V27.H}[2], [X1], X7
77    SQSHL           V22.4S, V29.4S, V31.4S
78    LD1             {V0.4S}, [X6], X8
79    ST1             {V27.H}[3], [X1], X7
80    SQADD           V21.4S, V22.4S, V30.4S
81    ST1             {V27.H}[0], [X1], X7
82    SQNEG           V0.4S, V0.4S
83    ST1             {V27.H}[1], [X1], X7
84    SSHR            V20.4S, V21.4S, #16
85    REV64           V20.4S, V20.4S
86    SUBS            X5, X5, #8
87
88
89    UZP1            V27.8H, V20.8H, V20.8H
90    SQSHL           V25.4S, V0.4S, V31.4S
91    ST1             {V27.H}[2], [X1], X7
92    LD1             {V6.4S}, [X6], X8
93    SQADD           V24.4S, V25.4S, V30.4S
94    ST1             {V27.H}[3], [X1], X7
95    SSHR            V23.4S, V24.4S, #16
96    ST1             {V27.H}[0], [X1], X7
97    REV64           V23.4S, V23.4S
98    ST1             {V27.H}[1], [X1], X7
99
100
101    UZP1            V27.8H, V23.8H, V23.8H
102    SQNEG           V29.4S, V6.4S
103
104    BGT             LOOP_1
105
106    ST1             {V27.H}[2], [X1], X7
107    SQSHL           V22.4S, V29.4S, V31.4S
108    ST1             {V27.H}[3], [X1], X7
109    ST1             {V27.H}[0], [X1], X7
110    SQADD           V21.4S, V22.4S, V30.4S
111    ST1             {V27.H}[1], [X1], X7
112    SSHR            V20.4S, V21.4S, #16
113
114    REV64           V20.4S, V20.4S
115
116    UZP1            V27.8H, V20.8H, V20.8H
117
118    ST1             {V27.H}[2], [X1], X7
119    ST1             {V27.H}[3], [X1], X7
120    ST1             {V27.H}[0], [X1], X7
121    ST1             {V27.H}[1], [X1], X7
122    pop_v_regs
123    RET
124