1@/******************************************************************************
2@ *
3@ * Copyright (C) 2015 The Android Open Source Project
4@ *
5@ * Licensed under the Apache License, Version 2.0 (the "License");
6@ * you may not use this file except in compliance with the License.
7@ * You may obtain a copy of the License at:
8@ *
9@ * http://www.apache.org/licenses/LICENSE-2.0
10@ *
11@ * Unless required by applicable law or agreed to in writing, software
12@ * distributed under the License is distributed on an "AS IS" BASIS,
13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14@ * See the License for the specific language governing permissions and
15@ * limitations under the License.
16@ *
17@ *****************************************************************************
18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19@*/
20
21@******************************************************************************
22@*
23@* @brief
24@*  This file contains definitions of routines for spatial filter
25@*
26@* @author
27@*  Ittiam
28@*
29@* @par List of Functions:
30@*  - ideint_cac_8x8_a9()
31@*
32@* @remarks
33@*  None
34@*
35@*******************************************************************************
36
37
38@******************************************************************************
39@*
40@*  @brief Calculates Combing Artifact
41@*
42@*  @par   Description
43@*   This functions calculates combing artifact check (CAC) for given two fields
44@*
45@* @param[in] pu1_top
46@*  UWORD8 pointer to top field
47@*
48@* @param[in] pu1_bot
49@*  UWORD8 pointer to bottom field
50@*
51@* @param[in] top_strd
52@*  Top field stride
53@*
54@* @param[in] bot_strd
55@*  Bottom field stride
56@*
57@* @returns
58@*  None
59@*
60@* @remarks
61@*
62@******************************************************************************
63
64    .global ideint_cac_8x8_a9
65
66ideint_cac_8x8_a9:
67
68    stmfd       sp!,    {r4-r10, lr}
69    vpush      {d9}
70
71    @ Load first row of top
72    vld1.u8     d28,    [r0],   r2
73
74    @ Load first row of bottom
75    vld1.u8     d29,    [r1],   r3
76
77    @ Load second row of top
78    vld1.u8     d30,    [r0],   r2
79
80    @ Load second row of bottom
81    vld1.u8     d31,    [r1],   r3
82
83
84    @ Calculate row based adj and alt values
85    @ Get row sums
86    vpaddl.u8   q0,     q14
87
88    vpaddl.u8   q1,     q15
89
90    vpaddl.u16  q0,     q0
91
92    vpaddl.u16  q1,     q1
93
94    @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows
95    @ Pack q0 and q1 into a single register (sum does not exceed 16bits)
96
97    vshl.u32    q8,     q1,     #16
98    vorr.u32    q8,     q0,     q8
99    @ q8 now contains 8 sums
100
101    @ Load third row of top
102    vld1.u8     d24,    [r0],   r2
103
104    @ Load third row of bottom
105    vld1.u8     d25,    [r1],   r3
106
107    @ Load fourth row of top
108    vld1.u8     d26,    [r0],   r2
109
110    @ Load fourth row of bottom
111    vld1.u8     d27,    [r1],   r3
112
113    @ Get row sums
114    vpaddl.u8   q2,     q12
115
116    vpaddl.u8   q3,     q13
117
118    vpaddl.u16  q2,     q2
119
120    vpaddl.u16  q3,     q3
121    @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows
122    @ Pack q2 and q3 into a single register (sum does not exceed 16bits)
123
124    vshl.u32    q9,     q3,     #16
125    vorr.u32    q9,     q2,     q9
126    @ q9 now contains 8 sums
127
128    @ Compute absolute diff between top and bottom row sums
129    vabd.u16    d16,    d16,    d17
130    vabd.u16    d17,    d18,    d19
131
132    @ RSUM_CSUM_THRESH
133    vmov.u16    q9,     #20
134
135    @ Eliminate values smaller than RSUM_CSUM_THRESH
136    vcge.u16    q10,    q8,     q9
137    vand.u16    q10,    q8,     q10
138    @ q10 now contains 8 absolute diff of sums above the threshold
139
140
141    @ Compute adj
142    vadd.u16    d20,    d20,    d21
143
144    @ d20 has four adj values for two sub-blocks
145
146    @ Compute alt
147    vabd.u32    q0,     q0,     q1
148    vabd.u32    q2,     q2,     q3
149
150    vadd.u32    q0,     q0,     q2
151    vadd.u32    d21,    d0,     d1
152    @ d21 has two values for two sub-blocks
153
154
155    @ Calculate column based adj and alt values
156
157    vrhadd.u8   q0,     q14,    q15
158    vrhadd.u8   q1,     q12,    q13
159    vrhadd.u8   q0,     q0,     q1
160
161    vabd.u8     d0,     d0,     d1
162
163    @ RSUM_CSUM_THRESH >> 2
164    vmov.u8     d9,     #5
165
166    @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2
167    vcge.u8     d1,     d0,     d9
168    vand.u8     d0,     d0,     d1
169    @ d0 now contains 8 absolute diff of sums above the threshold
170
171
172    vpaddl.u8   d0,     d0
173    vshl.u16    d0,     d0,     #2
174
175    @ Add row based adj
176    vadd.u16    d20,    d0,     d20
177
178    vpaddl.u16  d20,    d20
179    @ d20 now contains 2 adj values
180
181
182    vrhadd.u8   d0,     d28,    d29
183    vrhadd.u8   d2,     d24,    d25
184    vrhadd.u8   d0,     d0,     d2
185
186    vrhadd.u8   d1,     d30,    d31
187    vrhadd.u8   d3,     d26,    d27
188    vrhadd.u8   d1,     d1,     d3
189
190    vabd.u8     d0,     d0,     d1
191    vpaddl.u8   d0,     d0
192
193    vshl.u16    d0,     d0,     #2
194    vpaddl.u16  d0,     d0
195    vadd.u32    d21,    d0,     d21
196
197
198    @ d21 now contains 2 alt values
199
200    @ SAD_BIAS_MULT_SHIFT
201    vshr.u32    d0,     d21,    #3
202    vadd.u32    d21,    d21,    d0
203
204    @ SAD_BIAS_ADDITIVE >> 1
205    vmov.u32    d0,     #4
206    vadd.u32    d21,    d21,    d0
207
208    vclt.u32    d0,     d21,    d20
209    vpaddl.u32  d0,     d0
210
211    vmov.u32    r0,     d0[0]
212    cmp         r0,     #0
213    movne       r0,     #1
214    vpop        {d9}
215    ldmfd       sp!,    {r4-r10, pc}
216