1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20 21@/** 22@****************************************************************************** 23@* 24@* @brief :Evaluate best intr chroma mode (among VERT, HORZ and DC ) 25@* and do the prediction. 26@* 27@* @par Description 28@* This function evaluates first three intra chroma modes and compute corresponding sad 29@* and return the buffer predicted with best mode. 30@* 31@* @param[in] pu1_src 32@* UWORD8 pointer to the source 33@* 34@** @param[in] pu1_ngbr_pels 35@* UWORD8 pointer to neighbouring pels 36@* 37@* @param[out] pu1_dst 38@* UWORD8 pointer to the destination 39@* 40@* @param[in] src_strd 41@* integer source stride 42@* 43@* @param[in] dst_strd 44@* integer destination stride 45@* 46@* @param[in] u4_n_avblty 47@* availability of neighbouring pixels 48@* 49@* @param[in] u4_intra_mode 50@* Pointer to the variable in which best mode is returned 51@* 52@* @param[in] pu4_sadmin 53@* Pointer to the variable in which minimum sad is returned 54@* 55@* @param[in] u4_valid_intra_modes 56@* Says what all modes are valid 57@* 58@* 59@* @return none 60@* 61@****************************************************************************** 62@*/ 63@ 64@void ih264e_evaluate_intra_chroma_modes(UWORD8 *pu1_src, 65@ UWORD8 *pu1_ngbr_pels_i16, 66@ UWORD8 *pu1_dst, 67@ UWORD32 src_strd, 68@ UWORD32 dst_strd, 69@ WORD32 u4_n_avblty, 70@ UWORD32 *u4_intra_mode, 71@ WORD32 *pu4_sadmin, 72@ UWORD32 u4_valid_intra_modes) 73@ 74.text 75.p2align 2 76 77 .global ih264e_evaluate_intra_chroma_modes_a9q 78 79ih264e_evaluate_intra_chroma_modes_a9q: 80 81@r0 = pu1_src, 82@r1 = pu1_ngbr_pels_i16, 83@r2 = pu1_dst, 84@r3 = src_strd, 85@r4 = dst_strd, 86@r5 = u4_n_avblty, 87@r6 = u4_intra_mode, 88@r7 = pu4_sadmin 89 90 91 92 stmfd sp!, {r4-r12, r14} @store register values to stack 93 @----------------------- 94 ldr r5, [sp, #44] @r5 = u4_n_avblty, 95 @------------------------- 96 mov r12, r1 @ 97 vpush {d8-d15} 98 vld1.32 {q4}, [r1]! 99 add r1, r1, #2 100 vld1.32 {q5}, [r1]! 101 102 vuzp.u8 q4, q5 @ 103 104 vpaddl.u8 d8, d8 105 vpadd.u16 d8, d8 106 107 vpaddl.u8 d9, d9 108 vpadd.u16 d9, d9 109 110 vpaddl.u8 d10, d10 111 vpadd.u16 d10, d10 112 113 vpaddl.u8 d11, d11 114 115 and r7, r5, #5 116 vpadd.u16 d11, d11 117 subs r8, r7, #5 118 beq all_available 119 subs r8, r7, #4 120 beq top_available 121 subs r8, r7, #1 122 beq left_available 123 mov r10, #128 124 vdup.8 q14, r10 125 vdup.8 q15, r10 126 b sad 127 128all_available: 129 vzip.u16 q4, q5 130 vext.16 q6, q4, q4, #2 131 vadd.u16 q7, q5, q6 132 vqrshrn.u16 d14, q7, #3 133 vqrshrn.u16 d15, q4, #2 134 vqrshrn.u16 d16, q5, #2 135 vdup.16 d28, d14[0] 136 vdup.16 d29, d16[1] 137 vdup.16 d30, d15[0] 138 vdup.16 d31, d14[1] 139 b sad 140top_available: 141 vzip.u16 q4, q5 142 vqrshrn.u16 d16, q5, #2 143 vdup.16 d28, d16[0] 144 vdup.16 d29, d16[1] 145 vdup.16 d30, d16[0] 146 vdup.16 d31, d16[1] 147 b sad 148left_available: 149 vzip.u16 q4, q5 150 vqrshrn.u16 d16, q4, #2 151 vdup.16 d28, d16[3] 152 vdup.16 d29, d16[3] 153 vdup.16 d30, d16[2] 154 vdup.16 d31, d16[2] 155 156 157sad: 158 vld1.32 {q4}, [r12]! 159 sub r8, r12, #2 160 add r12, r12, #2 161 vld1.32 {q5}, [r12]! 162 add r12, r0, r3, lsl #2 163 sub r10, r8, #8 164 vld1.32 {q0}, [r0], r3 165 ldrh r9, [r8] 166 vdup.16 q10, r9 @ row 0 167 168 @/vertical row 0; 169 vabdl.u8 q8, d0, d10 170 vabdl.u8 q9, d1, d11 171 sub r8, r8, #2 172 vld1.32 {q1}, [r12], r3 173 174 @/HORZ row 0; 175 vabdl.u8 q13, d0, d20 176 vabdl.u8 q7, d1, d21 177 ldrh r9, [r10] 178 @/dc row 0; 179 vabdl.u8 q11, d0, d28 180 vabdl.u8 q12, d1, d29 181 182 183 vdup.16 q10, r9 @ row 4 184 @/vertical row 4; 185 vabal.u8 q8, d2, d10 186 vabal.u8 q9, d3, d11 187 sub r10, r10, #2 188 189 @/HORZ row 4; 190 vabal.u8 q13, d2, d20 191 vabal.u8 q7, d3, d21 192 @/dc row 4; 193 vabal.u8 q11, d2, d30 194 vabal.u8 q12, d3, d31 195 196 mov r11, #3 197 198loop: 199 vld1.32 {q0}, [r0], r3 200 ldrh r9, [r8] 201 202 203 @/vertical row i; 204 vabal.u8 q8, d0, d10 205 vabal.u8 q9, d1, d11 206 207 vdup.16 q10, r9 @ row i 208 vld1.32 {q1}, [r12], r3 209 sub r8, r8, #2 210 @/HORZ row i; 211 vabal.u8 q13, d0, d20 212 vabal.u8 q7, d1, d21 213 ldrh r9, [r10] 214 @/dc row i; 215 vabal.u8 q11, d0, d28 216 vabal.u8 q12, d1, d29 217 sub r10, r10, #2 218 219 vdup.16 q10, r9 @ row i+4 220 @/vertical row 4; 221 vabal.u8 q8, d2, d10 222 vabal.u8 q9, d3, d11 223 subs r11, r11, #1 224 225 @/HORZ row i+4; 226 vabal.u8 q13, d2, d20 227 vabal.u8 q7, d3, d21 228 @/dc row i+4; 229 vabal.u8 q11, d2, d30 230 vabal.u8 q12, d3, d31 231 bne loop 232 233 234 235@------------------------------------------- 236 237 vadd.i16 q9, q9, q8 @/VERT 238 vadd.i16 q7, q13, q7 @/HORZ 239 vadd.i16 q12, q11, q12 @/DC 240 vadd.i16 d18, d19, d18 @/VERT 241 vadd.i16 d14, d15, d14 @/HORZ 242 vadd.i16 d24, d24, d25 @/DC 243 vpaddl.u16 d18, d18 @/VERT 244 vpaddl.u16 d14, d14 @/HORZ 245 vpaddl.u16 d24, d24 @/DC 246 vpaddl.u32 d18, d18 @/VERT 247 vpaddl.u32 d14, d14 @/HORZ 248 vpaddl.u32 d24, d24 @/DC 249 250 251 252 vmov.u32 r8, d18[0] @ vert 253 vmov.u32 r9, d14[0] @horz 254 vmov.u32 r10, d24[0] @dc 255 256 mov r11, #1 257@----------------------- 258 ldr r0, [sp, #120] @ u4_valid_intra_modes 259@-------------------------------------------- 260 261 262 lsl r11 , #30 263 264 ands r7, r0, #04 @ vert mode valid???????????? 265 moveq r8, r11 266 267 ands r6, r0, #02 @ horz mode valid???????????? 268 moveq r9, r11 269 270 ands r6, r0, #01 @ dc mode valid???????????? 271 moveq r10, r11 272 273 274 @--------------------------- 275 ldr r4, [sp, #104] @r4 = dst_strd, 276 ldr r6, [sp, #112] @ R6 =MODE 277 ldr r7, [sp, #116] @r7 = pu4_sadmin 278 279 @-------------------------- 280 281 cmp r10, r9 282 bgt not_dc 283 cmp r10, r8 284 bgt do_vert 285 286 @/---------------------- 287 @DO DC PREDICTION 288 str r10 , [r7] @MIN SAD 289 mov r10, #0 290 str r10 , [r6] @ MODE 291 b do_dc_vert 292 @----------------------------- 293 294not_dc: 295 cmp r9, r8 296 bgt do_vert 297 @/---------------------- 298 @DO HORIZONTAL 299 300 vdup.16 q10, d9[3] @/HORIZONTAL VALUE ROW=0; 301 str r9 , [r7] @MIN SAD 302 mov r9, #1 303 vdup.16 q11, d9[2] @/HORIZONTAL VALUE ROW=1; 304 str r9 , [r6] @ MODE 305 vdup.16 q12, d9[1] @/HORIZONTAL VALUE ROW=2; 306 vst1.32 {d20, d21} , [r2], r4 @0 307 vdup.16 q13, d9[0] @/HORIZONTAL VALUE ROW=3; 308 vst1.32 {d22, d23} , [r2], r4 @1 309 vdup.16 q14, d8[3] @/HORIZONTAL VALUE ROW=4; 310 vst1.32 {d24, d25} , [r2], r4 @2 311 vdup.16 q15, d8[2] @/HORIZONTAL VALUE ROW=5; 312 vst1.32 {d26, d27} , [r2], r4 @3 313 vdup.16 q1, d8[1] @/HORIZONTAL VALUE ROW=6; 314 vst1.32 {d28, d29} , [r2], r4 @4 315 vdup.16 q2, d8[0] @/HORIZONTAL VALUE ROW=7; 316 vst1.32 {d30, d31} , [r2], r4 @5 317 vst1.32 {d2, d3} , [r2], r4 @6 318 vst1.32 {d4, d5} , [r2], r4 @7 319 b end_func 320 321do_vert: 322 @DO VERTICAL PREDICTION 323 str r8 , [r7] @MIN SAD 324 mov r8, #2 325 str r8 , [r6] @ MODE 326 vmov q15, q5 327 vmov q14, q5 328 329do_dc_vert: 330 vst1.32 {d28, d29} , [r2], r4 @0 331 vst1.32 {d28, d29} , [r2], r4 @1 332 vst1.32 {d28, d29} , [r2], r4 @2 333 vst1.32 {d28, d29} , [r2], r4 @3 334 vst1.32 {d30, d31} , [r2], r4 @4 335 vst1.32 {d30, d31} , [r2], r4 @5 336 vst1.32 {d30, d31} , [r2], r4 @6 337 vst1.32 {d30, d31} , [r2], r4 @7 338 339 340end_func: 341 vpop {d8-d15} 342 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 343 344 345 346