1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21@****************************************************************************** 22@* 23@* @brief 24@* This file contains definitions of routines for spatial filter 25@* 26@* @author 27@* Ittiam 28@* 29@* @par List of Functions: 30@* - ideint_cac_8x8_a9() 31@* 32@* @remarks 33@* None 34@* 35@******************************************************************************* 36 37 38@****************************************************************************** 39@* 40@* @brief Calculates Combing Artifact 41@* 42@* @par Description 43@* This functions calculates combing artifact check (CAC) for given two fields 44@* 45@* @param[in] pu1_top 46@* UWORD8 pointer to top field 47@* 48@* @param[in] pu1_bot 49@* UWORD8 pointer to bottom field 50@* 51@* @param[in] top_strd 52@* Top field stride 53@* 54@* @param[in] bot_strd 55@* Bottom field stride 56@* 57@* @returns 58@* None 59@* 60@* @remarks 61@* 62@****************************************************************************** 63 64 .global ideint_cac_8x8_a9 65 66ideint_cac_8x8_a9: 67 68 stmfd sp!, {r4-r10, lr} 69 vpush {d9} 70 71 @ Load first row of top 72 vld1.u8 d28, [r0], r2 73 74 @ Load first row of bottom 75 vld1.u8 d29, [r1], r3 76 77 @ Load second row of top 78 vld1.u8 d30, [r0], r2 79 80 @ Load second row of bottom 81 vld1.u8 d31, [r1], r3 82 83 84 @ Calculate row based adj and alt values 85 @ Get row sums 86 vpaddl.u8 q0, q14 87 88 vpaddl.u8 q1, q15 89 90 vpaddl.u16 q0, q0 91 92 vpaddl.u16 q1, q1 93 94 @ Both q0 and q1 have four 32 bit sums corresponding to first 4 rows 95 @ Pack q0 and q1 into a single register (sum does not exceed 16bits) 96 97 vshl.u32 q8, q1, #16 98 vorr.u32 q8, q0, q8 99 @ q8 now contains 8 sums 100 101 @ Load third row of top 102 vld1.u8 d24, [r0], r2 103 104 @ Load third row of bottom 105 vld1.u8 d25, [r1], r3 106 107 @ Load fourth row of top 108 vld1.u8 d26, [r0], r2 109 110 @ Load fourth row of bottom 111 vld1.u8 d27, [r1], r3 112 113 @ Get row sums 114 vpaddl.u8 q2, q12 115 116 vpaddl.u8 q3, q13 117 118 vpaddl.u16 q2, q2 119 120 vpaddl.u16 q3, q3 121 @ Both q2 and q3 have four 32 bit sums corresponding to last 4 rows 122 @ Pack q2 and q3 into a single register (sum does not exceed 16bits) 123 124 vshl.u32 q9, q3, #16 125 vorr.u32 q9, q2, q9 126 @ q9 now contains 8 sums 127 128 @ Compute absolute diff between top and bottom row sums 129 vabd.u16 d16, d16, d17 130 vabd.u16 d17, d18, d19 131 132 @ RSUM_CSUM_THRESH 133 vmov.u16 q9, #20 134 135 @ Eliminate values smaller than RSUM_CSUM_THRESH 136 vcge.u16 q10, q8, q9 137 vand.u16 q10, q8, q10 138 @ q10 now contains 8 absolute diff of sums above the threshold 139 140 141 @ Compute adj 142 vadd.u16 d20, d20, d21 143 144 @ d20 has four adj values for two sub-blocks 145 146 @ Compute alt 147 vabd.u32 q0, q0, q1 148 vabd.u32 q2, q2, q3 149 150 vadd.u32 q0, q0, q2 151 vadd.u32 d21, d0, d1 152 @ d21 has two values for two sub-blocks 153 154 155 @ Calculate column based adj and alt values 156 157 vrhadd.u8 q0, q14, q15 158 vrhadd.u8 q1, q12, q13 159 vrhadd.u8 q0, q0, q1 160 161 vabd.u8 d0, d0, d1 162 163 @ RSUM_CSUM_THRESH >> 2 164 vmov.u8 d9, #5 165 166 @ Eliminate values smaller than RSUM_CSUM_THRESH >> 2 167 vcge.u8 d1, d0, d9 168 vand.u8 d0, d0, d1 169 @ d0 now contains 8 absolute diff of sums above the threshold 170 171 172 vpaddl.u8 d0, d0 173 vshl.u16 d0, d0, #2 174 175 @ Add row based adj 176 vadd.u16 d20, d0, d20 177 178 vpaddl.u16 d20, d20 179 @ d20 now contains 2 adj values 180 181 182 vrhadd.u8 d0, d28, d29 183 vrhadd.u8 d2, d24, d25 184 vrhadd.u8 d0, d0, d2 185 186 vrhadd.u8 d1, d30, d31 187 vrhadd.u8 d3, d26, d27 188 vrhadd.u8 d1, d1, d3 189 190 vabd.u8 d0, d0, d1 191 vpaddl.u8 d0, d0 192 193 vshl.u16 d0, d0, #2 194 vpaddl.u16 d0, d0 195 vadd.u32 d21, d0, d21 196 197 198 @ d21 now contains 2 alt values 199 200 @ SAD_BIAS_MULT_SHIFT 201 vshr.u32 d0, d21, #3 202 vadd.u32 d21, d21, d0 203 204 @ SAD_BIAS_ADDITIVE >> 1 205 vmov.u32 d0, #4 206 vadd.u32 d21, d21, d0 207 208 vclt.u32 d0, d21, d20 209 vpaddl.u32 d0, d0 210 211 vmov.u32 r0, d0[0] 212 cmp r0, #0 213 movne r0, #1 214 vpop {d9} 215 ldmfd sp!, {r4-r10, pc} 216