1///***************************************************************************** 2//* 3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//*****************************************************************************/ 18///** 19///******************************************************************************* 20//* @file 21//* ihevc_deblk_luma_vert.s 22//* 23//* @brief 24//* contains function definitions for inter prediction interpolation. 25//* functions are coded using neon intrinsics and can be compiled using 26 27//* rvct 28//* 29//* @author 30//* anand s 31//* 32//* @par list of functions: 33//* 34//* 35//* @remarks 36//* none 37//* 38//*******************************************************************************/ 39//void ihevc_deblk_chroma_vert(UWORD8 *pu1_src, 40// WORD32 src_strd, 41// WORD32 quant_param_p, 42// WORD32 quant_param_q, 43// WORD32 qp_offset_u, 44// WORD32 qp_offset_v, 45// WORD32 tc_offset_div2, 46// WORD32 filter_flag_p, 47// WORD32 filter_flag_q) 48 49.text 50.align 4 51.include "ihevc_neon_macros.s" 52 53 54 55.extern gai4_ihevc_qp_table 56.extern gai4_ihevc_tc_table 57.globl ihevc_deblk_chroma_vert_av8 58 59.type ihevc_deblk_chroma_vert_av8, %function 60 61ihevc_deblk_chroma_vert_av8: 62 sxtw x4,w4 63 sxtw x5,w5 64 sxtw x6,w6 65 mov x15,x5 66 mov x5,x6 67 mov x6,x15 68 mov x12, x7 69 mov x7, x4 70 ldr w4, [sp] 71 72 push_v_regs 73 stp x19, x20,[sp,#-16]! 74 75 sub x8,x0,#4 76 add x2,x2,x3 77 ld1 {v5.8b},[x8],x1 78 add x2,x2,#1 79 ld1 {v17.8b},[x8],x1 80 ld1 {v16.8b},[x8],x1 81 ld1 {v4.8b},[x8] 82 83 trn1 v29.8b, v5.8b, v17.8b 84 trn2 v17.8b, v5.8b, v17.8b 85 mov v5.d[0], v29.d[0] 86 adds x3,x7,x2,asr #1 87 trn1 v29.8b, v16.8b, v4.8b 88 trn2 v4.8b, v16.8b, v4.8b 89 mov v16.d[0], v29.d[0] 90 adrp x7, :got:gai4_ihevc_qp_table 91 ldr x7, [x7, #:got_lo12:gai4_ihevc_qp_table] 92 93 94 bmi l1.2944 95 cmp x3,#0x39 96 bgt lbl78 97 ldr w3, [x7,x3,lsl #2] 98 sxtw x3,w3 99lbl78: 100 sub x20,x3,#6 101 csel x3, x20, x3,gt 102l1.2944: 103 trn1 v29.4h, v5.4h, v16.4h 104 trn2 v16.4h, v5.4h, v16.4h 105 mov v5.d[0], v29.d[0] 106 adds x2,x6,x2,asr #1 107 trn1 v29.4h, v17.4h, v4.4h 108 trn2 v4.4h, v17.4h, v4.4h 109 mov v17.d[0], v29.d[0] 110 bmi l1.2964 111 cmp x2,#0x39 112 bgt lbl86 113 ldr w2, [x7,x2,lsl #2] 114 sxtw x2,w2 115lbl86: 116 sub x20,x2,#6 117 csel x2, x20, x2,gt 118l1.2964: 119 trn1 v29.2s, v5.2s, v17.2s 120 trn2 v17.2s, v5.2s, v17.2s 121 mov v5.d[0], v29.d[0] 122 add x3,x3,x5,lsl #1 123 trn1 v29.2s, v16.2s, v4.2s 124 trn2 v4.2s, v16.2s, v4.2s 125 mov v16.d[0], v29.d[0] 126 add x6,x3,#2 127 uxtl v18.8h, v17.8b 128 cmp x6,#0x35 129 mov x20,#0x35 130 csel x3, x20, x3,gt 131 bgt l1.2996 132 adds x6,x3,#2 133 add x20,x3,#2 134 csel x3, x20, x3,pl 135 mov x20,#0 136 csel x3, x20, x3,mi 137l1.2996: 138 usubl v0.8h, v17.8b, v16.8b 139 adrp x6, :got:gai4_ihevc_tc_table 140 ldr x6, [x6, #:got_lo12:gai4_ihevc_tc_table] 141 shl v0.8h, v0.8h,#2 142 add x2,x2,x5,lsl #1 143 add x5,x2,#2 144 uaddw v0.8h, v0.8h , v5.8b 145 cmp x5,#0x35 146 ldr w3, [x6,x3,lsl #2] 147 sxtw x3,w3 148 usubw v4.8h, v0.8h , v4.8b 149 mov x20,#0x35 150 csel x2, x20, x2,gt 151 bgt l1.3036 152 adds x5,x2,#2 153 add x20,x2,#2 154 csel x2, x20, x2,pl 155 mov x20,#0 156 csel x2, x20, x2,mi 157l1.3036: 158 159 160 srshr v6.8h, v4.8h,#3 161 dup v2.4h,w3 162 ldr w2, [x6,x2,lsl #2] 163 sxtw x2,w2 164 sub x20,x3,#0 165 neg x3, x20 166 cmp x12,#0 167 dup v3.4h,w2 168 sub x20,x2,#0 169 neg x2, x20 170 dup v30.4h,w3 171 dup v31.4h,w2 172 173 mov v30.d[1],v31.d[0] 174 mov v2.d[1],v3.d[0] 175 176 smin v4.8h, v6.8h , v2.8h 177 smax v2.8h, v30.8h , v4.8h 178 179 uxtl v6.8h, v16.8b 180 181 add v0.8h, v6.8h , v2.8h 182 sub v2.8h, v18.8h , v2.8h 183 sqxtun v0.8b, v0.8h 184 sub x2,x0,#2 185 sqxtun v1.8b, v2.8h 186 trn1 v29.2s, v0.2s, v1.2s 187 trn2 v1.2s, v0.2s, v1.2s 188 mov v0.d[0], v29.d[0] 189 trn1 v29.8b, v0.8b, v1.8b 190 trn2 v1.8b, v0.8b, v1.8b 191 mov v0.d[0], v29.d[0] 192 beq l1.3204 193 194 st1 {v0.h}[0],[x2],x1 195 st1 {v1.h}[0],[x2],x1 196 st1 {v0.h}[1],[x2],x1 197 st1 {v1.h}[1],[x2] 198l1.3204: 199 cmp x4,#0 200 beq l1.3228 201 st1 {v0.h}[2],[x0],x1 202 st1 {v1.h}[2],[x0],x1 203 st1 {v0.h}[3],[x0],x1 204 st1 {v1.h}[3],[x0] 205l1.3228: 206 ldp x19, x20,[sp],#16 207 pop_v_regs 208 ret 209 210 211 212