1///***************************************************************************** 2//* 3//* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4//* 5//* Licensed under the Apache License, Version 2.0 (the "License"); 6//* you may not use this file except in compliance with the License. 7//* You may obtain a copy of the License at: 8//* 9//* http://www.apache.org/licenses/LICENSE-2.0 10//* 11//* Unless required by applicable law or agreed to in writing, software 12//* distributed under the License is distributed on an "AS IS" BASIS, 13//* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14//* See the License for the specific language governing permissions and 15//* limitations under the License. 16//* 17//*****************************************************************************/ 18///** 19///******************************************************************************* 20//* //file 21//* ihevcd_itrans_recon_dc_luma.s 22//* 23//* //brief 24//* contains function definitions itrans and recon for dc only case 25//* 26//* //author 27//* ittiam 28//* 29//* //par list of functions: 30//* 31//* 32//* //remarks 33//* none 34//* 35//*******************************************************************************/ 36 37.text 38.include "ihevc_neon_macros.s" 39 40 41 42.globl ihevcd_itrans_recon_dc_luma_av8 43 44.type ihevcd_itrans_recon_dc_luma_av8, %function 45 46ihevcd_itrans_recon_dc_luma_av8: 47 48//void ihevcd_itrans_recon_dc_luma(uword8 *pu1_pred, 49// uword8 *pu1_dst, 50// word32 pred_strd, 51// word32 dst_strd, 52// word32 log2_trans_size, 53// word16 i2_coeff_value) 54 55//x0:pu1_pred 56//x1:pu1_dest 57//x2:pred_strd 58//x3:dst_strd 59 60 61 62 63 stp x19, x20,[sp,#-16]! 64 sxth x5,w5 65 66 mov x10,#1 67 lsl x4,x10,x4 // trans_size = (1 << log2_trans_size)// 68 mov x6,#64 // 1 << (shift1 - 1)// 69 mov x7,#2048 // 1<<(shift2-1) 70 71 add x8,x6,x5,lsl #6 72 asr x20, x8, #7 73 mov x19, #32767 74 cmp x20,x19 75 blt lbl37 76 mov x8,#32767 77 b lbl37_1 78lbl37: 79 mov x19,#-32768 80 cmp x20,x19 81 csel x8, x19, x20, lt 82lbl37_1: 83 84 add x5,x7,x8,lsl #6 85 asr x20, x5, #12 86 mov x19,#32767 87 cmp x20,x19 88 blt lbl39 89 mov x6,#32767 90 b lbl39_1 91lbl39: 92 mov x19,#-32768 93 cmp x20,x19 94 csel x6, x19, x20, lt 95lbl39_1: 96 97 mov x9,x4 98 mov x8,x4 99 100 // x6 has the dc_value 101 // x4 has the trans_size value 102 // x8 has the row value 103 // x9 has the col value 104 dup v0.8h,w6 105 cmp x4,#4 106 beq row_loop_4 107 108 109row_loop: 110 mov x9,x4 111 112 113col_loop: 114 115 mov x7,x0 116 ld1 {v2.8b},[x7],x2 117 ld1 {v3.8b},[x7],x2 118 ld1 {v4.8b},[x7],x2 119 ld1 {v5.8b},[x7],x2 120 121 ld1 {v6.8b},[x7],x2 122 ld1 {v7.8b},[x7],x2 123 ld1 {v1.8b},[x7],x2 124 ld1 {v17.8b},[x7] 125 126 add x0,x0,#8 127 128 129 uaddw v30.8h, v0.8h , v2.8b 130 uaddw v28.8h, v0.8h , v3.8b 131 uaddw v26.8h, v0.8h , v4.8b 132 uaddw v24.8h, v0.8h , v5.8b 133 uaddw v22.8h, v0.8h , v6.8b 134 uaddw v20.8h, v0.8h , v7.8b 135 uaddw v18.8h, v0.8h , v1.8b 136 uaddw v16.8h, v0.8h , v17.8b 137 138 mov x11,x1 139 sqxtun v2.8b, v30.8h 140 sqxtun v3.8b, v28.8h 141 sqxtun v4.8b, v26.8h 142 sqxtun v5.8b, v24.8h 143 sqxtun v6.8b, v22.8h 144 sqxtun v7.8b, v20.8h 145 sqxtun v1.8b, v18.8h 146 sqxtun v17.8b, v16.8h 147 148 149 st1 {v2.2s},[x11],x3 150 st1 {v3.2s},[x11],x3 151 st1 {v4.2s},[x11],x3 152 st1 {v5.2s},[x11],x3 153 st1 {v6.2s},[x11],x3 154 st1 {v7.2s},[x11],x3 155 st1 {v1.2s},[x11],x3 156 st1 {v17.2s},[x11] 157 158 add x1,x1,#8 159 160 subs x9,x9,#8 161 bgt col_loop 162 163 subs x8,x8,#8 164 165 add x0,x0,x2,lsl #3 166 add x1,x1,x3,lsl #3 167 sub x0,x0,x4 168 sub x1,x1,x4 169 bgt row_loop 170 b end_loops 171 172 173row_loop_4: 174 mov x9,x10 175 176 177col_loop_4: 178 179 180 ld1 {v2.8b},[x0],x2 181 ld1 {v3.8b},[x0],x2 182 ld1 {v4.8b},[x0],x2 183 ld1 {v5.8b},[x0] 184 185 186 187 188 uaddw v30.8h, v0.8h , v2.8b 189 uaddw v28.8h, v0.8h , v3.8b 190 uaddw v26.8h, v0.8h , v4.8b 191 uaddw v24.8h, v0.8h , v5.8b 192 193 194 195 sqxtun v2.8b, v30.8h 196 sqxtun v3.8b, v28.8h 197 sqxtun v4.8b, v26.8h 198 sqxtun v5.8b, v24.8h 199 200 201 202 st1 {v2.s}[0],[x1],x3 203 st1 {v3.s}[0],[x1],x3 204 st1 {v4.s}[0],[x1],x3 205 st1 {v5.s}[0],[x1] 206 207end_loops: 208 ldp x19, x20,[sp],#16 209 210 ret 211 212 213 214 215 216 217 218 219