1//****************************************************************************** 2//* 3//* Copyright (C) 2015 The Android Open Source Project 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//***************************************************************************** 18//* Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19//*/ 20///** 21//****************************************************************************** 22//* @file 23//* ih264_intra_pred_luma_8x8_av8.s 24//* 25//* @brief 26//* Contains function definitions for intra 8x8 Luma prediction . 27//* 28//* @author 29//* Ittiam 30//* 31//* @par List of Functions: 32//* 33//* -ih264_intra_pred_luma_8x8_mode_vert_av8 34//* -ih264_intra_pred_luma_8x8_mode_horz_av8 35//* -ih264_intra_pred_luma_8x8_mode_dc_av8 36//* -ih264_intra_pred_luma_8x8_mode_diag_dl_av8 37//* -ih264_intra_pred_luma_8x8_mode_diag_dr_av8 38//* -ih264_intra_pred_luma_8x8_mode_vert_r_av8 39//* -ih264_intra_pred_luma_8x8_mode_horz_d_av8 40//* -ih264_intra_pred_luma_8x8_mode_vert_l_av8 41//* -ih264_intra_pred_luma_8x8_mode_horz_u_av8 42//* 43//* @remarks 44//* None 45//* 46//******************************************************************************* 47//*/ 48 49///* All the functions here are replicated from ih264_intra_pred_filters.c 50// 51 52///** 53///** 54///** 55 56.text 57.p2align 2 58.include "ih264_neon_macros.s" 59 60.extern ih264_gai1_intrapred_luma_8x8_horz_u 61 62 63 64///** 65//******************************************************************************* 66//* 67//*ih264_intra_pred_luma_8x8_mode_vert 68//* 69//* @brief 70//* Perform Intra prediction for luma_8x8 mode:vertical 71//* 72//* @par Description: 73//* Perform Intra prediction for luma_8x8 mode:vertical ,described in sec 8.3.2.2.2 74//* 75//* @param[in] pu1_src 76//* UWORD8 pointer to the source 77//* 78//* @param[out] pu1_dst 79//* UWORD8 pointer to the destination 80//* 81//* @param[in] src_strd 82//* integer source stride 83//* 84//* @param[in] dst_strd 85//* integer destination stride 86//* 87//* @param[in] ui_neighboravailability 88//* availability of neighbouring pixels(Not used in this function) 89//* 90//* @returns 91//* 92//* @remarks 93//* None 94//* 95//******************************************************************************* 96//void ih264_intra_pred_luma_8x8_mode_vert(UWORD8 *pu1_src, 97// UWORD8 *pu1_dst, 98// WORD32 src_strd, 99// WORD32 dst_strd, 100// WORD32 ui_neighboravailability) 101 102//**************Variables Vs Registers***************************************** 103// x0 => *pu1_src 104// x1 => *pu1_dst 105// w2 => src_strd 106// w3 => dst_strd 107// w4 => ui_neighboravailability 108 109 110 .global ih264_intra_pred_luma_8x8_mode_vert_av8 111 112ih264_intra_pred_luma_8x8_mode_vert_av8: 113 114 // STMFD sp!, {x4-x12, x14} //store register values to stack 115 push_v_regs 116 //stp x19, x20,[sp,#-16]! 117 sxtw x3, w3 118 119 add x0, x0, #9 120 ld1 {v0.8b}, [x0] 121 122 st1 {v0.8b}, [x1], x3 123 st1 {v0.8b}, [x1], x3 124 st1 {v0.8b}, [x1], x3 125 st1 {v0.8b}, [x1], x3 126 st1 {v0.8b}, [x1], x3 127 st1 {v0.8b}, [x1], x3 128 st1 {v0.8b}, [x1], x3 129 st1 {v0.8b}, [x1], x3 130 131 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 132 //ldp x19, x20,[sp],#16 133 pop_v_regs 134 ret 135 136 137 138 139 140///****************************************************************************** 141 142 143///** 144//******************************************************************************* 145//* 146//*ih264_intra_pred_luma_8x8_mode_horz 147//* 148//* @brief 149//* Perform Intra prediction for luma_8x8 mode:horizontal 150//* 151//* @par Description: 152//* Perform Intra prediction for luma_8x8 mode:horizontal ,described in sec 8.3.2.2.2 153//* 154//* @param[in] pu1_src 155//* UWORD8 pointer to the source 156//* 157//* @param[out] pu1_dst 158//* UWORD8 pointer to the destination 159//* 160//* @param[in] src_strd 161//* integer source stride 162//* 163//* @param[in] dst_strd 164//* integer destination stride 165//* 166//* @param[in] ui_neighboravailability 167//* availability of neighbouring pixels(Not used in this function) 168//* 169//* @returns 170//* 171//* @remarks 172//* None 173//* 174//******************************************************************************* 175//*/ 176//void ih264_intra_pred_luma_8x8_mode_horz(UWORD8 *pu1_src, 177// UWORD8 *pu1_dst, 178// WORD32 src_strd, 179// WORD32 dst_strd, 180// WORD32 ui_neighboravailability) 181//**************Variables Vs Registers***************************************** 182// x0 => *pu1_src 183// x1 => *pu1_dst 184// w2 => src_strd 185// w3 => dst_strd 186// w4 => ui_neighboravailability 187 188 189 .global ih264_intra_pred_luma_8x8_mode_horz_av8 190 191ih264_intra_pred_luma_8x8_mode_horz_av8: 192 193 194 195 // STMFD sp!, {x4-x12, x14} //store register values to stack 196 push_v_regs 197 stp x19, x20, [sp, #-16]! 198 sxtw x3, w3 199 add x0, x0, #7 200 201 ldrb w5, [x0], #-1 202 ldrb w6, [x0], #-1 203 dup v0.8b, w5 204 st1 {v0.8b}, [x1], x3 205 ldrb w7, [x0], #-1 206 dup v1.8b, w6 207 st1 {v1.8b}, [x1], x3 208 dup v2.8b, w7 209 ldrb w8, [x0], #-1 210 dup v3.8b, w8 211 st1 {v2.8b}, [x1], x3 212 ldrb w5, [x0], #-1 213 st1 {v3.8b}, [x1], x3 214 dup v0.8b, w5 215 ldrb w6, [x0], #-1 216 st1 {v0.8b}, [x1], x3 217 ldrb w7, [x0], #-1 218 dup v1.8b, w6 219 dup v2.8b, w7 220 st1 {v1.8b}, [x1], x3 221 ldrb w8, [x0], #-1 222 dup v3.8b, w8 223 st1 {v2.8b}, [x1], x3 224 st1 {v3.8b}, [x1], x3 225 226 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 227 ldp x19, x20, [sp], #16 228 pop_v_regs 229 ret 230 231 232 233 234 235 236 237///****************************************************************************** 238 239 240///** 241//******************************************************************************* 242//* 243//*ih264_intra_pred_luma_8x8_mode_dc 244//* 245//* @brief 246//* Perform Intra prediction for luma_8x8 mode:DC 247//* 248//* @par Description: 249//* Perform Intra prediction for luma_8x8 mode:DC ,described in sec 8.3.2.2.3 250//* 251//* @param[in] pu1_src 252//* UWORD8 pointer to the source 253//* 254//* @param[out] pu1_dst 255//* UWORD8 pointer to the destination 256//* 257//* @param[in] src_strd 258//* integer source stride 259//* 260//* @param[in] dst_strd 261//* integer destination stride 262//* 263//* @param[in] ui_neighboravailability 264//* availability of neighbouring pixels 265//* 266//* @returns 267//* 268//* @remarks 269//* None 270//* 271//*******************************************************************************/ 272//void ih264_intra_pred_luma_8x8_mode_dc(UWORD8 *pu1_src, 273// UWORD8 *pu1_dst, 274// WORD32 src_strd, 275// WORD32 dst_strd, 276// WORD32 ui_neighboravailability) 277 278//**************Variables Vs Registers***************************************** 279// x0 => *pu1_src 280// x1 => *pu1_dst 281// w2 => src_strd 282// w3 => dst_strd 283// w4 => ui_neighboravailability 284 285 286 .global ih264_intra_pred_luma_8x8_mode_dc_av8 287 288ih264_intra_pred_luma_8x8_mode_dc_av8: 289 290 291 292 // STMFD sp!, {x4-x12, x14} //store register values to stack 293 push_v_regs 294 sxtw x3, w3 295 stp x19, x20, [sp, #-16]! 296 297 ands w6, w4, #0x01 298 beq top_available //LEFT NOT AVAILABLE 299 300 add x10, x0, #7 301 mov x2, #-1 302 ldrb w5, [x10], -1 303 ldrb w6, [x10], -1 304 ldrb w7, [x10], -1 305 add w5, w5, w6 306 ldrb w8, [x10], -1 307 add w5, w5, w7 308 ldrb w6, [x10], -1 309 add w5, w5, w8 310 ldrb w7, [x10], -1 311 add w5, w5, w6 312 ldrb w8, [x10], -1 313 add w5, w5, w7 314 ands w11, w4, #0x04 // CHECKING IF TOP_AVAILABLE ELSE BRANCHING TO ONLY LEFT AVAILABLE 315 add w5, w5, w8 316 ldrb w6, [x10], -1 317 add w5, w5, w6 318 beq left_available 319 add x10, x0, #9 320 // BOTH LEFT AND TOP AVAILABLE 321 ld1 {v0.8b}, [x10] 322 uaddlp v1.4h, v0.8b 323 uaddlp v3.2s, v1.4h 324 uaddlp v2.1d, v3.2s 325 dup v10.8h, w5 326 dup v8.8h, v2.h[0] 327 add v12.8h, v8.8h , v10.8h 328 sqrshrun v31.8b, v12.8h, #4 329 st1 {v31.8b}, [x1], x3 330 st1 {v31.8b}, [x1], x3 331 st1 {v31.8b}, [x1], x3 332 st1 {v31.8b}, [x1], x3 333 st1 {v31.8b}, [x1], x3 334 st1 {v31.8b}, [x1], x3 335 st1 {v31.8b}, [x1], x3 336 st1 {v31.8b}, [x1], x3 337 b end_func 338 339top_available: // ONLT TOP AVAILABLE 340 ands w11, w4, #0x04 // CHECKING TOP AVAILABILTY OR ELSE BRANCH TO NONE AVAILABLE 341 beq none_available 342 343 add x10, x0, #9 344 ld1 {v10.8b}, [x10] 345 uaddlp v14.4h, v10.8b 346 uaddlp v13.2s, v14.4h 347 uaddlp v12.1d, v13.2s 348 rshrn v4.8b, v12.8h, #3 349 dup v31.8b, v4.b[0] 350 st1 {v31.8b}, [x1], x3 351 st1 {v31.8b}, [x1], x3 352 st1 {v31.8b}, [x1], x3 353 st1 {v31.8b}, [x1], x3 354 st1 {v31.8b}, [x1], x3 355 st1 {v31.8b}, [x1], x3 356 st1 {v31.8b}, [x1], x3 357 st1 {v31.8b}, [x1], x3 358 b end_func 359 360 361left_available: //ONLY LEFT AVAILABLE 362 add x5, x5, #4 363 lsr x5, x5, #3 364 dup v0.8b, w5 365 st1 {v0.8b}, [x1], x3 366 st1 {v0.8b}, [x1], x3 367 st1 {v0.8b}, [x1], x3 368 st1 {v0.8b}, [x1], x3 369 st1 {v0.8b}, [x1], x3 370 st1 {v0.8b}, [x1], x3 371 st1 {v0.8b}, [x1], x3 372 st1 {v0.8b}, [x1], x3 373 b end_func 374 375none_available: //NONE AVAILABLE 376 mov x9, #128 377 dup v0.8b, w9 378 st1 {v0.8b}, [x1], x3 379 st1 {v0.8b}, [x1], x3 380 st1 {v0.8b}, [x1], x3 381 st1 {v0.8b}, [x1], x3 382 st1 {v0.8b}, [x1], x3 383 st1 {v0.8b}, [x1], x3 384 st1 {v0.8b}, [x1], x3 385 st1 {v0.8b}, [x1], x3 386 387 388end_func: 389 390 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 391 ldp x19, x20, [sp], #16 392 pop_v_regs 393 ret 394 395 396 397 398 399 400///** 401//******************************************************************************* 402//* 403//*ih264_intra_pred_luma_8x8_mode_diag_dl 404//* 405//* @brief 406//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left 407//* 408//* @par Description: 409//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Left ,described in sec 8.3.2.2.4 410//* 411//* @param[in] pu1_src 412//* UWORD8 pointer to the source 413//* 414//* @param[out] pu1_dst 415//* UWORD8 pointer to the destination 416//* 417//* @param[in] src_strd 418//* integer source stride 419//* 420//* @param[in] dst_strd 421//* integer destination stride 422//* 423//* @param[in] ui_neighboravailability 424//* availability of neighbouring pixels 425//* 426//* @returns 427//* 428//* @remarks 429//* None 430//* 431//*******************************************************************************/ 432//void ih264_intra_pred_luma_8x8_mode_diag_dl(UWORD8 *pu1_src, 433// UWORD8 *pu1_dst, 434// WORD32 src_strd, 435// WORD32 dst_strd, 436// WORD32 ui_neighboravailability) 437 438//**************Variables Vs Registers***************************************** 439// x0 => *pu1_src 440// x1 => *pu1_dst 441// w2 => src_strd 442// w3 => dst_strd 443// w4 => ui_neighboravailability 444 445 .global ih264_intra_pred_luma_8x8_mode_diag_dl_av8 446 447ih264_intra_pred_luma_8x8_mode_diag_dl_av8: 448 449 // STMFD sp!, {x4-x12, x14} //store register values to stack 450 push_v_regs 451 stp x19, x20, [sp, #-16]! 452 sxtw x3, w3 453 454 add x0, x0, #9 455 sub x5, x3, #4 456 add x6, x0, #15 457 ld1 { v0.16b}, [x0] 458 mov v1.d[0], v0.d[1] 459 ext v4.16b, v0.16b , v0.16b , #2 460 mov v5.d[0], v4.d[1] 461 ext v2.16b, v0.16b , v0.16b , #1 462 mov v3.d[0], v2.d[1] 463 ld1 {v5.b}[6], [x6] 464 // q1 = q0 shifted to left once 465 // q2 = q1 shifted to left once 466 uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121 467 uaddl v22.8h, v1.8b, v3.8b 468 uaddl v24.8h, v2.8b, v4.8b 469 uaddl v26.8h, v3.8b, v5.8b 470 add v24.8h, v20.8h , v24.8h 471 add v26.8h, v22.8h , v26.8h 472 473 sqrshrun v4.8b, v24.8h, #2 474 sqrshrun v5.8b, v26.8h, #2 475 mov v4.d[1], v5.d[0] 476 //Q2 has all FILT121 values 477 st1 {v4.8b}, [x1], x3 478 ext v18.16b, v4.16b , v4.16b , #1 479 ext v16.16b, v18.16b , v18.16b , #1 480 st1 {v18.8b}, [x1], x3 481 ext v14.16b, v16.16b , v16.16b , #1 482 st1 {v16.8b}, [x1], x3 483 st1 {v14.8b}, [x1], x3 484 st1 {v4.s}[1], [x1], #4 485 st1 {v5.s}[0], [x1], x5 486 st1 {v18.s}[1], [x1], #4 487 st1 {v18.s}[2], [x1], x5 488 st1 {v16.s}[1], [x1], #4 489 st1 {v16.s}[2], [x1], x5 490 st1 {v14.s}[1], [x1], #4 491 st1 {v14.s}[2], [x1], x5 492 493 494end_func_diag_dl: 495 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 496 ldp x19, x20, [sp], #16 497 pop_v_regs 498 ret 499 500 501 502 503///** 504//******************************************************************************* 505//* 506//*ih264_intra_pred_luma_8x8_mode_diag_dr 507//* 508//* @brief 509//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right 510//* 511//* @par Description: 512//* Perform Intra prediction for luma_8x8 mode:Diagonal_Down_Right ,described in sec 8.3.2.2.5 513//* 514//* @param[in] pu1_src 515//* UWORD8 pointer to the source 516//* 517//* @param[out] pu1_dst 518//* UWORD8 pointer to the destination 519//* 520//* @param[in] src_strd 521//* integer source stride 522//* 523//* @param[in] dst_strd 524//* integer destination stride 525//* 526//* @param[in] ui_neighboravailability 527//* availability of neighbouring pixels 528//* 529//* @returns 530//* 531//* @remarks 532//* None 533//* 534//*******************************************************************************/ 535//void ih264_intra_pred_luma_8x8_mode_diag_dr(UWORD8 *pu1_src, 536// UWORD8 *pu1_dst, 537// WORD32 src_strd, 538// WORD32 dst_strd, 539// WORD32 ui_neighboravailability) 540 541//**************Variables Vs Registers***************************************** 542// x0 => *pu1_src 543// x1 => *pu1_dst 544// w2 => src_strd 545// w3 => dst_strd 546// w4 => ui_neighboravailability 547 548 549 .global ih264_intra_pred_luma_8x8_mode_diag_dr_av8 550 551ih264_intra_pred_luma_8x8_mode_diag_dr_av8: 552 553 // STMFD sp!, {x4-x12, x14} //store register values to stack 554 push_v_regs 555 stp x19, x20, [sp, #-16]! 556 sxtw x3, w3 557 558 559 ld1 { v0.16b}, [x0] 560 mov v1.d[0], v0.d[1] 561 add x0, x0, #1 562 ld1 { v2.16b}, [x0] 563 mov v3.d[0], v2.d[1] 564 ext v4.16b, v2.16b , v2.16b , #1 565 mov v5.d[0], v4.d[1] 566 // q1 = q0 shifted to left once 567 // q2 = q1 shifted to left once 568 uaddl v20.8h, v0.8b, v2.8b //Adding for FILT121 569 uaddl v22.8h, v1.8b, v3.8b 570 uaddl v24.8h, v2.8b, v4.8b 571 uaddl v26.8h, v3.8b, v5.8b 572 add v24.8h, v20.8h , v24.8h 573 add v26.8h, v22.8h , v26.8h 574 sqrshrun v4.8b, v24.8h, #2 575 sqrshrun v5.8b, v26.8h, #2 576 mov v4.d[1], v5.d[0] 577 //Q2 has all FILT121 values 578 sub x5, x3, #4 579 ext v18.16b, v4.16b , v4.16b , #15 580 st1 {v18.d}[1], [x1], x3 581 ext v16.16b, v18.16b , v18.16b , #15 582 st1 {v16.d}[1], [x1], x3 583 ext v14.16b, v16.16b , v16.16b , #15 584 st1 {v14.d}[1], [x1], x3 585 st1 {v4.s}[1], [x1], #4 586 st1 {v5.s}[0], [x1], x5 587 st1 {v18.s}[1], [x1], #4 588 st1 {v18.s}[2], [x1], x5 589 st1 {v16.s}[1], [x1], #4 590 st1 {v16.s}[2], [x1], x5 591 st1 {v14.s}[1], [x1], #4 592 st1 {v14.s}[2], [x1], x5 593 st1 {v4.8b}, [x1], x3 594 595end_func_diag_dr: 596 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 597 ldp x19, x20, [sp], #16 598 pop_v_regs 599 ret 600 601 602 603 604///** 605//******************************************************************************* 606//* 607//*ih264_intra_pred_luma_8x8_mode_vert_r 608//* 609//* @brief 610//* Perform Intra prediction for luma_8x8 mode:Vertical_Right 611//* 612//* @par Description: 613//* Perform Intra prediction for luma_8x8 mode:Vertical_Right ,described in sec 8.3.2.2.6 614//* 615//* @param[in] pu1_src 616//* UWORD8 pointer to the source 617//* 618//* @param[out] pu1_dst 619//* UWORD8 pointer to the destination 620//* 621//* @param[in] src_strd 622//* integer source stride 623//* 624//* @param[in] dst_strd 625//* integer destination stride 626//* 627//* @param[in] ui_neighboravailability 628//* availability of neighbouring pixels 629//* 630//* @returns 631//* 632//* @remarks 633//* None 634//* 635//*******************************************************************************/ 636//void ih264_intra_pred_luma_8x8_mode_vert_r(UWORD8 *pu1_src, 637// UWORD8 *pu1_dst, 638// WORD32 src_strd, 639// WORD32 dst_strd, 640// WORD32 ui_neighboravailability) 641 642//**************Variables Vs Registers***************************************** 643// x0 => *pu1_src 644// x1 => *pu1_dst 645// w2 => src_strd 646// w3 => dst_strd 647// w4 => ui_neighboravailability 648 649 650 .global ih264_intra_pred_luma_8x8_mode_vert_r_av8 651 652ih264_intra_pred_luma_8x8_mode_vert_r_av8: 653 654 // STMFD sp!, {x4-x12, x14} //store register values to stack 655 push_v_regs 656 stp x19, x20, [sp, #-16]! 657 sxtw x3, w3 658 659 ld1 { v0.16b}, [x0] 660 mov v1.d[0], v0.d[1] 661 add x0, x0, #1 662 ld1 { v2.16b}, [x0] 663 mov v3.d[0], v2.d[1] 664 ext v4.16b, v2.16b , v2.16b , #1 665 mov v5.d[0], v4.d[1] 666 // q1 = q0 shifted to left once 667 // q2 = q1 shifted to left once 668 uaddl v20.8h, v0.8b, v2.8b 669 uaddl v22.8h, v1.8b, v3.8b 670 uaddl v24.8h, v2.8b, v4.8b 671 uaddl v26.8h, v3.8b, v5.8b 672 add v24.8h, v20.8h , v24.8h 673 add v26.8h, v22.8h , v26.8h 674 675 sqrshrun v4.8b, v20.8h, #1 676 sqrshrun v5.8b, v22.8h, #1 677 mov v4.d[1], v5.d[0] 678 sqrshrun v6.8b, v24.8h, #2 679 sqrshrun v7.8b, v26.8h, #2 680 mov v6.d[1], v7.d[0] 681 //Q2 has all FILT11 values 682 //Q3 has all FILT121 values 683 sub x5, x3, #6 684 sub x6, x3, #4 685 st1 {v5.8b}, [x1], x3 // row 0 686 ext v18.16b, v6.16b , v6.16b , #15 687 mov v22.16b , v18.16b 688 ext v16.16b, v4.16b , v4.16b , #1 689 st1 {v18.d}[1], [x1], x3 //row 1 690 mov v14.16b , v16.16b 691 ext v20.16b, v4.16b , v4.16b , #15 692 uzp1 v17.16b, v16.16b, v18.16b 693 uzp2 v18.16b, v16.16b, v18.16b 694 mov v16.16b , v17.16b 695 //row 2 696 ext v12.16b, v16.16b , v16.16b , #1 697 st1 {v20.d}[1], [x1] 698 st1 {v6.b}[6], [x1], x3 699 //row 3 700 701 st1 {v12.h}[5], [x1], #2 702 st1 {v6.s}[2], [x1], #4 703 st1 {v6.h}[6], [x1], x5 704 //row 4 705 st1 {v18.h}[5], [x1], #2 706 st1 {v4.s}[2], [x1], #4 707 st1 {v4.h}[6], [x1], x5 708 //row 5 709 ext v26.16b, v18.16b , v18.16b , #1 710 st1 {v16.h}[5], [x1], #2 711 st1 {v22.s}[2], [x1], #4 712 st1 {v22.h}[6], [x1], x5 713 //row 6 714 st1 {v26.h}[4], [x1], #2 715 st1 {v26.b}[10], [x1], #1 716 st1 {v4.b}[8], [x1], #1 717 st1 {v14.s}[2], [x1], x6 718 //row 7 719 st1 {v12.s}[2], [x1], #4 720 st1 {v6.s}[2], [x1], #4 721 722end_func_vert_r: 723 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 724 ldp x19, x20, [sp], #16 725 pop_v_regs 726 ret 727 728 729 730 731///** 732//******************************************************************************* 733//* 734//*ih264_intra_pred_luma_8x8_mode_horz_d 735//* 736//* @brief 737//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down 738//* 739//* @par Description: 740//* Perform Intra prediction for luma_8x8 mode:Horizontal_Down ,described in sec 8.3.2.2.7 741//* 742//* @param[in] pu1_src 743//* UWORD8 pointer to the source 744//* 745//* @param[out] pu1_dst 746//* UWORD8 pointer to the destination 747//* 748//* @param[in] src_strd 749//* integer source stride 750//* 751//* @param[in] dst_strd 752//* integer destination stride 753//* 754//* @param[in] ui_neighboravailability 755//* availability of neighbouring pixels 756//* 757//* @returns 758//* 759//* @remarks 760//* None 761//* 762//*******************************************************************************/ 763//void ih264_intra_pred_luma_8x8_mode_horz_d(UWORD8 *pu1_src, 764// UWORD8 *pu1_dst, 765// WORD32 src_strd, 766// WORD32 dst_strd, 767// WORD32 ui_neighboravailability) 768 769//**************Variables Vs Registers***************************************** 770// x0 => *pu1_src 771// x1 => *pu1_dst 772// w2 => src_strd 773// w3 => dst_strd 774// w4 => ui_neighboravailability 775 776 .global ih264_intra_pred_luma_8x8_mode_horz_d_av8 777 778ih264_intra_pred_luma_8x8_mode_horz_d_av8: 779 780 // STMFD sp!, {x4-x12, x14} //store register values to stack 781 push_v_regs 782 stp x19, x20, [sp, #-16]! 783 sxtw x3, w3 784 785 ld1 { v0.16b}, [x0] 786 mov v1.d[0], v0.d[1] 787 add x0, x0, #1 788 ld1 { v2.16b}, [x0] 789 mov v3.d[0], v2.d[1] 790 ext v4.16b, v2.16b , v2.16b , #1 791 mov v5.d[0], v4.d[1] 792 // q1 = q0 shifted to left once 793 // q2 = q1 shifted to left once 794 uaddl v20.8h, v0.8b, v2.8b 795 uaddl v22.8h, v1.8b, v3.8b 796 uaddl v24.8h, v2.8b, v4.8b 797 uaddl v26.8h, v3.8b, v5.8b 798 add v24.8h, v20.8h , v24.8h 799 add v26.8h, v22.8h , v26.8h 800 801 sqrshrun v4.8b, v20.8h, #1 802 sqrshrun v5.8b, v22.8h, #1 803 mov v4.d[1], v5.d[0] 804 sqrshrun v6.8b, v24.8h, #2 805 sqrshrun v7.8b, v26.8h, #2 806 mov v6.d[1], v7.d[0] 807 //Q2 has all FILT11 values 808 //Q3 has all FILT121 values 809 mov v8.16b, v4.16b 810 mov v10.16b, v6.16b 811 sub x6, x3, #6 812 trn1 v9.16b, v8.16b, v10.16b 813 trn2 v10.16b, v8.16b, v10.16b // 814 mov v8.16b, v9.16b 815 mov v12.16b, v8.16b 816 mov v14.16b, v10.16b 817 sub x5, x3, #4 818 trn1 v13.8h, v12.8h, v14.8h 819 trn2 v14.8h, v12.8h, v14.8h 820 mov v12.16b, v13.16b 821 ext v16.16b, v6.16b , v6.16b , #14 822 //ROW 0 823 st1 {v16.d}[1], [x1] 824 st1 {v10.h}[3], [x1], x3 825 826 //ROW 1 827 st1 {v14.s}[1], [x1], #4 828 st1 {v6.s}[2], [x1], x5 829 //ROW 2 830 st1 {v10.h}[2], [x1], #2 831 st1 {v14.s}[1], [x1], #4 832 st1 {v7.h}[0], [x1], x6 833 //ROW 3 834 st1 {v12.s}[1], [x1], #4 835 st1 {v14.s}[1], [x1], x5 836 //ROW 4 837 st1 {v14.h}[1], [x1], #2 838 st1 {v12.s}[1], [x1], #4 839 st1 {v14.h}[2], [x1], x6 840 //ROW 5 841 st1 {v14.s}[0], [x1], #4 842 st1 {v12.s}[1], [x1], x5 843 //ROW 6 844 st1 {v10.h}[0], [x1], #2 845 st1 {v8.h}[1], [x1], #2 846 st1 {v14.h}[1], [x1], #2 847 st1 {v12.h}[2], [x1], x6 848 //ROW 7 849 st1 {v12.s}[0], [x1], #4 850 st1 {v14.s}[0], [x1], x5 851 852end_func_horz_d: 853 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 854 ldp x19, x20, [sp], #16 855 pop_v_regs 856 ret 857 858 859 860 861 862///** 863//******************************************************************************* 864//* 865//*ih264_intra_pred_luma_8x8_mode_vert_l 866//* 867//* @brief 868//* Perform Intra prediction for luma_8x8 mode:Vertical_Left 869//* 870//* @par Description: 871//* Perform Intra prediction for luma_8x8 mode:Vertical_Left ,described in sec 8.3.2.2.8 872//* 873//* @param[in] pu1_src 874//* UWORD8 pointer to the source 875//* 876//* @param[out] pu1_dst 877//* UWORD8 pointer to the destination 878//* 879//* @param[in] src_strd 880//* integer source stride 881//* 882//* @param[in] dst_strd 883//* integer destination stride 884//* 885//* @param[in] ui_neighboravailability 886//* availability of neighbouring pixels 887//* 888//* @returns 889//* 890//* @remarks 891//* None 892//* 893//*******************************************************************************/ 894//void ih264_intra_pred_luma_8x8_mode_vert_l(UWORD8 *pu1_src, 895// UWORD8 *pu1_dst, 896// WORD32 src_strd, 897// WORD32 dst_strd, 898// WORD32 ui_neighboravailability) 899 900//**************Variables Vs Registers***************************************** 901// x0 => *pu1_src 902// x1 => *pu1_dst 903// w2 => src_strd 904// w3 => dst_strd 905// w4 => ui_neighboravailability 906 907 908 .global ih264_intra_pred_luma_8x8_mode_vert_l_av8 909 910ih264_intra_pred_luma_8x8_mode_vert_l_av8: 911 912 // STMFD sp!, {x4-x12, x14} //Restoring registers from stack 913 push_v_regs 914 stp x19, x20, [sp, #-16]! 915 sxtw x3, w3 916 add x0, x0, #9 917 ld1 { v0.16b}, [x0] 918 mov v1.d[0], v0.d[1] 919 add x0, x0, #1 920 ld1 { v2.16b}, [x0] 921 mov v3.d[0], v2.d[1] 922 ext v4.16b, v2.16b , v2.16b , #1 923 mov v5.d[0], v4.d[1] 924 uaddl v20.8h, v0.8b, v2.8b 925 uaddl v22.8h, v1.8b, v3.8b 926 uaddl v24.8h, v2.8b, v4.8b 927 uaddl v26.8h, v3.8b, v5.8b 928 add v24.8h, v20.8h , v24.8h 929 add v26.8h, v22.8h , v26.8h 930 931 sqrshrun v4.8b, v20.8h, #1 932 sqrshrun v5.8b, v22.8h, #1 933 mov v4.d[1], v5.d[0] 934 sqrshrun v6.8b, v24.8h, #2 935 ext v8.16b, v4.16b , v4.16b , #1 936 sqrshrun v7.8b, v26.8h, #2 937 mov v6.d[1], v7.d[0] 938 //Q2 has all FILT11 values 939 //Q3 has all FILT121 values 940 941 ext v10.16b, v6.16b , v6.16b , #1 942 //ROW 0,1 943 st1 {v4.8b}, [x1], x3 944 st1 {v6.8b}, [x1], x3 945 946 ext v12.16b, v8.16b , v8.16b , #1 947 ext v14.16b, v10.16b , v10.16b , #1 948 //ROW 2,3 949 st1 {v8.8b}, [x1], x3 950 st1 {v10.8b}, [x1], x3 951 952 ext v16.16b, v12.16b , v12.16b , #1 953 ext v18.16b, v14.16b , v14.16b , #1 954 //ROW 4,5 955 st1 {v12.8b}, [x1], x3 956 st1 {v14.8b}, [x1], x3 957 //ROW 6,7 958 st1 {v16.8b}, [x1], x3 959 st1 {v18.8b}, [x1], x3 960 961end_func_vert_l: 962 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 963 ldp x19, x20, [sp], #16 964 pop_v_regs 965 ret 966 967 968 969 970 971///** 972//******************************************************************************* 973//* 974//*ih264_intra_pred_luma_8x8_mode_horz_u 975//* 976//* @brief 977//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up 978//* 979//* @par Description: 980//* Perform Intra prediction for luma_8x8 mode:Horizontal_Up ,described in sec 8.3.2.2.9 981//* 982//* @param[in] pu1_src 983//* UWORD8 pointer to the source 984//* 985//* @param[out] pu1_dst 986//* UWORD8 pointer to the destination 987//* 988//* @param[in] src_strd 989//* integer source stride 990//* 991//* @param[in] dst_strd 992//* integer destination stride 993//* 994//* @param[in] ui_neighboravailability 995//* availability of neighbouring pixels 996//* 997//* @returns 998//* 999//* @remarks 1000//* None 1001//* 1002//*******************************************************************************/ 1003//void ih264_intra_pred_luma_8x8_mode_horz_u(UWORD8 *pu1_src, 1004// UWORD8 *pu1_dst, 1005// WORD32 src_strd, 1006// WORD32 dst_strd, 1007// WORD32 ui_neighboravailability) 1008 1009//**************Variables Vs Registers***************************************** 1010// x0 => *pu1_src 1011// x1 => *pu1_dst 1012// w2 => src_strd 1013// w3 => dst_strd 1014// w4 => ui_neighboravailability 1015 1016 .global ih264_intra_pred_luma_8x8_mode_horz_u_av8 1017 1018ih264_intra_pred_luma_8x8_mode_horz_u_av8: 1019 1020 // STMFD sp!, {x4-x12, x14} //store register values to stack 1021 push_v_regs 1022 stp x19, x20, [sp, #-16]! 1023 sxtw x3, w3 1024 1025 ld1 {v0.8b}, [x0] 1026 ld1 {v1.b}[7], [x0] 1027 mov v0.d[1], v1.d[0] 1028 ext v2.16b, v0.16b , v0.16b , #1 1029 mov v3.d[0], v2.d[1] 1030 ext v4.16b, v2.16b , v2.16b , #1 1031 mov v5.d[0], v4.d[1] 1032 1033 adrp x12, :got:ih264_gai1_intrapred_luma_8x8_horz_u 1034 ldr x12, [x12, #:got_lo12:ih264_gai1_intrapred_luma_8x8_horz_u] 1035 uaddl v20.8h, v0.8b, v2.8b 1036 uaddl v22.8h, v1.8b, v3.8b 1037 uaddl v24.8h, v2.8b, v4.8b 1038 uaddl v26.8h, v3.8b, v5.8b 1039 add v24.8h, v20.8h , v24.8h 1040 add v26.8h, v22.8h , v26.8h 1041 ld1 { v10.16b}, [x12] 1042 mov v11.d[0], v10.d[1] 1043 sqrshrun v4.8b, v20.8h, #1 1044 sqrshrun v5.8b, v22.8h, #1 1045 mov v4.d[1], v5.d[0] 1046 sqrshrun v6.8b, v24.8h, #2 1047 sqrshrun v7.8b, v26.8h, #2 1048 mov v6.d[1], v7.d[0] 1049 //Q2 has all FILT11 values 1050 //Q3 has all FILT121 values 1051 mov v30.16b, v4.16b 1052 mov v31.16b, v6.16b 1053 tbl v12.8b, {v30.16b, v31.16b}, v10.8b 1054 dup v14.16b, v5.b[7] // 1055 tbl v13.8b, {v30.16b, v31.16b}, v11.8b 1056 mov v12.d[1], v13.d[0] 1057 ext v16.16b, v12.16b , v14.16b , #2 1058 ext v18.16b, v16.16b , v14.16b , #2 1059 st1 {v12.8b}, [x1], x3 //0 1060 ext v20.16b, v18.16b , v14.16b , #2 1061 st1 {v16.8b}, [x1], x3 //1 1062 st1 {v18.8b}, [x1], x3 //2 1063 st1 {v20.8b}, [x1], x3 //3 1064 st1 {v13.8b}, [x1], x3 //4 1065 st1 {v16.d}[1], [x1], x3 //5 1066 st1 {v18.d}[1], [x1], x3 //6 1067 st1 {v20.d}[1], [x1], x3 //7 1068 1069 1070end_func_horz_u: 1071 // LDMFD sp!,{x4-x12,PC} //Restoring registers from stack 1072 ldp x19, x20, [sp], #16 1073 pop_v_regs 1074 ret 1075 1076 1077