1@/****************************************************************************** 2@ * 3@ * Copyright (C) 2015 The Android Open Source Project 4@ * 5@ * Licensed under the Apache License, Version 2.0 (the "License"); 6@ * you may not use this file except in compliance with the License. 7@ * You may obtain a copy of the License at: 8@ * 9@ * http://www.apache.org/licenses/LICENSE-2.0 10@ * 11@ * Unless required by applicable law or agreed to in writing, software 12@ * distributed under the License is distributed on an "AS IS" BASIS, 13@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@ * See the License for the specific language governing permissions and 15@ * limitations under the License. 16@ * 17@ ***************************************************************************** 18@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19@*/ 20@** 21@****************************************************************************** 22@* @file 23@* ih264_intra_pred_luma_16x16_a9q.s 24@* 25@* @brief 26@* Contains function definitions for intra 16x16 Luma prediction . 27@* 28@* @author 29@* Ittiam 30@* 31@* @par List of Functions: 32@* 33@* - ih264_intra_pred_luma_16x16_mode_vert_a9q() 34@* - ih264_intra_pred_luma_16x16_mode_horz_a9q() 35@* - ih264_intra_pred_luma_16x16_mode_dc_a9q() 36@* - ih264_intra_pred_luma_16x16_mode_plane_a9q() 37@* 38@* @remarks 39@* None 40@* 41@******************************************************************************* 42@* 43 44@* All the functions here are replicated from ih264_intra_pred_filters.c 45@ 46 47@** 48@** 49@** 50@ 51 52.text 53.p2align 2 54 55 56 .extern ih264_gai1_intrapred_luma_plane_coeffs 57.hidden ih264_gai1_intrapred_luma_plane_coeffs 58scratch_intrapred_addr1: 59 .long ih264_gai1_intrapred_luma_plane_coeffs - scrlbl1 - 8 60@** 61@******************************************************************************* 62@* 63@*ih264_intra_pred_luma_16x16_mode_vert 64@* 65@* @brief 66@* Perform Intra prediction for luma_16x16 mode:vertical 67@* 68@* @par Description: 69@* Perform Intra prediction for luma_16x16 mode:Vertical ,described in sec 8.3.3.1 70@* 71@* @param[in] pu1_src 72@* UWORD8 pointer to the source 73@* 74@* @param[out] pu1_dst 75@* UWORD8 pointer to the destination 76@* 77@* @param[in] src_strd 78@* integer source stride 79@* 80@* @param[in] dst_strd 81@* integer destination stride 82@* 83@* @param[in] ui_neighboravailability 84@* availability of neighbouring pixels(Not used in this function) 85@* 86@* @returns 87@* 88@* @remarks 89@* None 90@* 91@******************************************************************************* 92@void ih264_intra_pred_luma_16x16_mode_vert(UWORD8 *pu1_src, 93@ UWORD8 *pu1_dst, 94@ WORD32 src_strd, 95@ WORD32 dst_strd, 96@ WORD32 ui_neighboravailability) 97 98@**************Variables Vs Registers***************************************** 99@ r0 => *pu1_src 100@ r1 => *pu1_dst 101@ r2 => src_strd 102@ r3 => dst_strd 103@ r4 => ui_neighboravailability 104 105 106 .global ih264_intra_pred_luma_16x16_mode_vert_a9q 107 108ih264_intra_pred_luma_16x16_mode_vert_a9q: 109 110 stmfd sp!, {r4-r12, r14} @store register values to stack 111 112 add r0, r0, #17 113 vld1.8 {q0}, [r0] 114 115 vst1.8 {q0}, [r1], r3 116 vst1.8 {q0}, [r1], r3 117 vst1.8 {q0}, [r1], r3 118 vst1.8 {q0}, [r1], r3 119 vst1.8 {q0}, [r1], r3 120 vst1.8 {q0}, [r1], r3 121 vst1.8 {q0}, [r1], r3 122 vst1.8 {q0}, [r1], r3 123 vst1.8 {q0}, [r1], r3 124 vst1.8 {q0}, [r1], r3 125 vst1.8 {q0}, [r1], r3 126 vst1.8 {q0}, [r1], r3 127 vst1.8 {q0}, [r1], r3 128 vst1.8 {q0}, [r1], r3 129 vst1.8 {q0}, [r1], r3 130 vst1.8 {q0}, [r1], r3 131 132 ldmfd sp!, {r4-r12, pc} @Restoring registers from stack 133 134 135 136 137 138@****************************************************************************** 139 140 141@** 142@******************************************************************************* 143@* 144@*ih264_intra_pred_luma_16x16_mode_horz 145@* 146@* @brief 147@* Perform Intra prediction for luma_16x16 mode:horizontal 148@* 149@* @par Description: 150@* Perform Intra prediction for luma_16x16 mode:horizontal ,described in sec 8.3.3.2 151@* 152@* @param[in] pu1_src 153@* UWORD8 pointer to the source 154@* 155@* @param[out] pu1_dst 156@* UWORD8 pointer to the destination 157@* 158@* @param[in] src_strd 159@* integer source stride 160@* 161@* @param[in] dst_strd 162@* integer destination stride 163@* 164@* @param[in] ui_neighboravailability 165@* availability of neighbouring pixels(Not used in this function) 166@* 167@* @returns 168@* 169@* @remarks 170@* None 171@* 172@******************************************************************************* 173@* 174@void ih264_intra_pred_luma_16x16_mode_horz(UWORD8 *pu1_src, 175@ UWORD8 *pu1_dst, 176@ WORD32 src_strd, 177@ WORD32 dst_strd, 178@ WORD32 ui_neighboravailability) 179@**************Variables Vs Registers***************************************** 180@ r0 => *pu1_src 181@ r1 => *pu1_dst 182@ r2 => src_strd 183@ r3 => dst_strd 184@ r4 => ui_neighboravailability 185 186 .global ih264_intra_pred_luma_16x16_mode_horz_a9q 187 188ih264_intra_pred_luma_16x16_mode_horz_a9q: 189 190 stmfd sp!, {r14} @store register values to stack 191 192 vld1.u8 {q0}, [r0] 193 mov r2, #14 194 195 vdup.u8 q1, d1[7] 196 vdup.u8 q2, d1[6] 197 vst1.8 {q1}, [r1], r3 198 199loop_16x16_horz: 200 vext.8 q0, q0, q0, #14 201 vst1.8 {q2}, [r1], r3 202 vdup.u8 q1, d1[7] 203 subs r2, #2 204 vdup.u8 q2, d1[6] 205 vst1.8 {q1}, [r1], r3 206 bne loop_16x16_horz 207 208 vext.8 q0, q0, q0, #14 209 vst1.8 {q2}, [r1], r3 210 211 ldmfd sp!, {pc} @Restoring registers from stack 212 213 214 215 216@****************************************************************************** 217 218 219@** 220@******************************************************************************* 221@* 222@*ih264_intra_pred_luma_16x16_mode_dc 223@* 224@* @brief 225@* Perform Intra prediction for luma_16x16 mode:DC 226@* 227@* @par Description: 228@* Perform Intra prediction for luma_16x16 mode:DC ,described in sec 8.3.3.3 229@* 230@* @param[in] pu1_src 231@* UWORD8 pointer to the source 232@* 233@* @param[out] pu1_dst 234@* UWORD8 pointer to the destination 235@* 236@* @param[in] src_strd 237@* integer source stride 238@* 239@* @param[in] dst_strd 240@* integer destination stride 241@* 242@* @param[in] ui_neighboravailability 243@* availability of neighbouring pixels 244@* 245@* @returns 246@* 247@* @remarks 248@* None 249@* 250@******************************************************************************* 251@void ih264_intra_pred_luma_16x16_mode_dc(UWORD8 *pu1_src, 252@ UWORD8 *pu1_dst, 253@ WORD32 src_strd, 254@ WORD32 dst_strd, 255@ WORD32 ui_neighboravailability) 256 257@**************Variables Vs Registers***************************************** 258@ r0 => *pu1_src 259@ r1 => *pu1_dst 260@ r2 => src_strd 261@ r3 => dst_strd 262@ r4 => ui_neighboravailability 263 264 .global ih264_intra_pred_luma_16x16_mode_dc_a9q 265 266ih264_intra_pred_luma_16x16_mode_dc_a9q: 267 268 stmfd sp!, {r4, r14} @store register values to stack 269 ldr r4, [sp, #8] @r4 => ui_neighboravailability 270 271 ands r2, r4, #0x01 @CHECKING IF LEFT_AVAILABLE ELSE BRANCHING TO ONLY TOP AVAILABLE 272 beq top_available 273 ands r2, r4, #0x04 @CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 274 beq left_available 275 276 vld1.u8 {q0}, [r0] @BOTH LEFT AND TOP AVAILABLE 277 add r0, r0, #17 278 vpaddl.u8 q0, q0 279 vld1.u8 {q1}, [r0] 280 vpaddl.u8 q1, q1 281 vadd.u16 q0, q0, q1 282 vadd.u16 d0, d0, d1 283 vpaddl.u16 d0, d0 284 vpaddl.u32 d0, d0 285 vqrshrun.s16 d0, q0, #5 286 vdup.u8 q0, d0[0] 287 b str_pred 288 289top_available: @ONLY TOP AVAILABLE 290 ands r2, r4, #0x04 @CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 291 beq none_available 292 293 add r0, r0, #17 294 vld1.u8 {q0}, [r0] 295 vpaddl.u8 q0, q0 296 vadd.u16 d0, d0, d1 297 vpaddl.u16 d0, d0 298 vpaddl.u32 d0, d0 299 vqrshrun.s16 d0, q0, #4 300 vdup.u8 q0, d0[0] 301 b str_pred 302 303left_available: @ONLY LEFT AVAILABLE 304 vld1.u8 {q0}, [r0] 305 vpaddl.u8 q0, q0 306 vadd.u16 d0, d0, d1 307 vpaddl.u16 d0, d0 308 vpaddl.u32 d0, d0 309 vqrshrun.s16 d0, q0, #4 310 vdup.u8 q0, d0[0] 311 b str_pred 312 313none_available: @NONE AVAILABLE 314 vmov.u8 q0, #128 315 316str_pred: 317 vst1.8 {q0}, [r1], r3 318 vst1.8 {q0}, [r1], r3 319 vst1.8 {q0}, [r1], r3 320 vst1.8 {q0}, [r1], r3 321 vst1.8 {q0}, [r1], r3 322 vst1.8 {q0}, [r1], r3 323 vst1.8 {q0}, [r1], r3 324 vst1.8 {q0}, [r1], r3 325 vst1.8 {q0}, [r1], r3 326 vst1.8 {q0}, [r1], r3 327 vst1.8 {q0}, [r1], r3 328 vst1.8 {q0}, [r1], r3 329 vst1.8 {q0}, [r1], r3 330 vst1.8 {q0}, [r1], r3 331 vst1.8 {q0}, [r1], r3 332 vst1.8 {q0}, [r1], r3 333 334 ldmfd sp!, {r4, pc} @Restoring registers from stack 335 336 337 338 339 340@****************************************************************************** 341 342 343@** 344@******************************************************************************* 345@* 346@*ih264_intra_pred_luma_16x16_mode_plane 347@* 348@* @brief 349@* Perform Intra prediction for luma_16x16 mode:PLANE 350@* 351@* @par Description: 352@* Perform Intra prediction for luma_16x16 mode:PLANE ,described in sec 8.3.3.4 353@* 354@* @param[in] pu1_src 355@* UWORD8 pointer to the source 356@* 357@* @param[out] pu1_dst 358@* UWORD8 pointer to the destination 359@* 360@* @param[in] src_strd 361@* integer source stride 362@* 363@* @param[in] dst_strd 364@* integer destination stride 365@* 366@* @param[in] ui_neighboravailability 367@* availability of neighbouring pixels 368@* 369@* @returns 370@* 371@* @remarks 372@* None 373@* 374@******************************************************************************* 375@void ih264_intra_pred_luma_16x16_mode_plane(UWORD8 *pu1_src, 376@ UWORD8 *pu1_dst, 377@ WORD32 src_strd, 378@ WORD32 dst_strd, 379@ WORD32 ui_neighboravailability) 380 381@**************Variables Vs Registers***************************************** 382@ r0 => *pu1_src 383@ r1 => *pu1_dst 384@ r2 => src_strd 385@ r3 => dst_strd 386@ r4 => ui_neighboravailability 387 388 .global ih264_intra_pred_luma_16x16_mode_plane_a9q 389ih264_intra_pred_luma_16x16_mode_plane_a9q: 390 391 stmfd sp!, {r4-r10, r12, lr} 392 393 mov r2, r1 394 add r1, r0, #17 395 add r0, r0, #15 396 397 mov r8, #9 398 sub r1, r1, #1 399 mov r10, r1 @top_left 400 mov r4, #-1 401 vld1.32 d2, [r1], r8 402 ldr r7, scratch_intrapred_addr1 403scrlbl1: 404 add r7, r7, pc 405 406 vld1.32 d0, [r1] 407 vrev64.8 d2, d2 408 vld1.32 {q3}, [r7] 409 vsubl.u8 q0, d0, d2 410 vmovl.u8 q8, d6 411 vmul.s16 q0, q0, q8 412 vmovl.u8 q9, d7 413 414 add r7, r0, r4, lsl #3 415 sub r0, r7, r4, lsl #1 416 rsb lr, r4, #0x0 417 418 vpadd.s16 d0, d0, d1 419 420 ldrb r8, [r7], r4 421 ldrb r9, [r0], lr 422 423 vpaddl.s16 d0, d0 424 sub r12, r8, r9 425 426 ldrb r8, [r7], r4 427 428 vpaddl.s32 d0, d0 429 ldrb r9, [r0], lr 430 sub r8, r8, r9 431 vshl.s32 d2, d0, #2 432 add r12, r12, r8, lsl #1 433 434 vadd.s32 d0, d0, d2 435 ldrb r8, [r7], r4 436 ldrb r9, [r0], lr 437 vrshr.s32 d0, d0, #6 @ i_b = D0[0] 438 sub r8, r8, r9 439 ldrb r5, [r7], r4 440 add r8, r8, r8, lsl #1 441 442 vdup.16 q2, d0[0] 443 add r12, r12, r8 444 ldrb r9, [r0], lr 445 vmul.s16 q0, q2, q8 446 sub r5, r5, r9 447 vmul.s16 q1, q2, q9 448 add r12, r12, r5, lsl #2 449 450 ldrb r8, [r7], r4 451 ldrb r9, [r0], lr 452 sub r8, r8, r9 453 ldrb r5, [r7], r4 454 add r8, r8, r8, lsl #2 455 ldrb r6, [r0], lr 456 add r12, r12, r8 457 ldrb r8, [r7], r4 458 ldrb r9, [r0], lr 459 460 sub r5, r5, r6 461 sub r8, r8, r9 462 add r5, r5, r5, lsl #1 463 rsb r8, r8, r8, lsl #3 464 add r12, r12, r5, lsl #1 465 ldrb r5, [r7], r4 466 ldrb r6, [r10] @top_left 467 add r12, r12, r8 468 sub r9, r5, r6 469 ldrb r6, [r1, #7] 470 add r12, r12, r9, lsl #3 @ i_c = r12 471 add r8, r5, r6 472 473 add r12, r12, r12, lsl #2 474 lsl r8, r8, #4 @ i_a = r8 475 476 add r12, r12, #0x20 477 lsr r12, r12, #6 478 479 vshl.s16 q14, q2, #3 480 vdup.16 q3, r12 481 482 vdup.16 q15, r8 483 vshl.s16 q13, q3, #3 484 vsub.s16 q15, q15, q14 485 vsub.s16 q15, q15, q13 486 vadd.s16 q14, q15, q3 487 488 mov r0, #14 489 vadd.s16 q13, q14, q0 490 vadd.s16 q14, q14, q1 491 vqrshrun.s16 d20, q13, #5 492 vqrshrun.s16 d21, q14, #5 493 494loop_16x16_plane: 495 496 vadd.s16 q13, q13, q3 497 vadd.s16 q14, q14, q3 498 vqrshrun.s16 d22, q13, #5 499 vst1.32 {q10}, [r2], r3 500 vqrshrun.s16 d23, q14, #5 501 502 vadd.s16 q13, q13, q3 503 subs r0, #2 504 vadd.s16 q14, q14, q3 505 vqrshrun.s16 d20, q13, #5 506 vst1.32 {q11}, [r2], r3 507 vqrshrun.s16 d21, q14, #5 508 bne loop_16x16_plane 509 510 vadd.s16 q13, q13, q3 511 vadd.s16 q14, q14, q3 512 vqrshrun.s16 d22, q13, #5 513 vst1.32 {q10}, [r2], r3 514 vqrshrun.s16 d23, q14, #5 515 vst1.32 {q11}, [r2], r3 516 517 ldmfd sp!, {r4-r10, r12, pc} 518 519 520 521