1@/***************************************************************************** 2@* 3@* Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore 4@* 5@* Licensed under the Apache License, Version 2.0 (the "License"); 6@* you may not use this file except in compliance with the License. 7@* You may obtain a copy of the License at: 8@* 9@* http://www.apache.org/licenses/LICENSE-2.0 10@* 11@* Unless required by applicable law or agreed to in writing, software 12@* distributed under the License is distributed on an "AS IS" BASIS, 13@* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14@* See the License for the specific language governing permissions and 15@* limitations under the License. 16@* 17@*****************************************************************************/ 18@/** 19@/******************************************************************************* 20@* @file 21@* ihevcd_itrans_recon_dc_chroma.s 22@* 23@* @brief 24@* contains function definitions itrans and recon for dc only case 25@* 26@* @author 27@* ittiam 28@* 29@* @par list of functions: 30@* 31@* 32@* @remarks 33@* none 34@* 35@*******************************************************************************/ 36 37.text 38 39 40.globl ihevcd_itrans_recon_dc_chroma_a9q 41 42.type ihevcd_itrans_recon_dc_chroma_a9q, %function 43 44ihevcd_itrans_recon_dc_chroma_a9q: 45 46@void ihevcd_itrans_recon_dc_chroma(uword8 *pu1_pred, 47@ uword8 *pu1_dst, 48@ word32 pred_strd, 49@ word32 dst_strd, 50@ word32 log2_trans_size, 51@ word16 i2_coeff_value) 52 53@r0:pu1_pred 54@r1:pu1_dest 55@r2:pred_strd 56@r3:dst_strd 57 58 59 60 push {r0-r11,lr} 61 vpush {d8-d15} 62 ldr r4,[sp,#0x74] @loads log2_trans_size 63 ldr r5,[sp,#0x78] @ loads i2_coeff_value 64 mov r10,#1 65 lsl r4,r10,r4 @ trans_size = (1 << log2_trans_size)@ 66 mov r6,#64 @ 1 << (shift1 - 1)@ 67 mov r7,#2048 @ 1<<(shift2-1) 68 69 add r8,r6,r5,lsl #6 70 ssat r8,#16,r8,asr #7 71 add r5,r7,r8,lsl #6 72 ssat r6,#16,r5,asr #12 73 mov r9,r4 74 mov r8,r4 75 76 @ r6 has the dc_value 77 @ r4 has the trans_size value 78 @ r8 has the row value 79 @ r9 has the col value 80 vdup.s16 q0,r6 81 cmp r4,#4 82 beq row_loop_4chroma 83 84 85row_loop_chroma: 86 mov r9,r4 87 88 89col_loop_chroma: 90 91 mov r7,r0 92 vld2.8 {d2,d3},[r7],r2 93 vld2.8 {d4,d5},[r7],r2 94 vld2.8 {d6,d7},[r7],r2 95 vld2.8 {d8,d9},[r7],r2 96 97 vld2.8 {d10,d11},[r7],r2 98 vld2.8 {d12,d13},[r7],r2 99 vld2.8 {d14,d15},[r7],r2 100 vld2.8 {d16,d17},[r7] 101 102 add r0,r0,#16 103 104 105 vaddw.u8 q15,q0,d2 106 vaddw.u8 q14,q0,d4 107 vaddw.u8 q13,q0,d6 108 vaddw.u8 q12,q0,d8 109 vaddw.u8 q11,q0,d10 110 vaddw.u8 q10,q0,d12 111 vaddw.u8 q9,q0,d14 112 113 114 mov r11,r1 115 vqmovun.s16 d2,q15 116 vqmovun.s16 d4,q14 117 vqmovun.s16 d6,q13 118 vqmovun.s16 d8,q12 119 120 vaddw.u8 q15,q0,d16 121 122 vqmovun.s16 d10,q11 123 vqmovun.s16 d12,q10 124 vqmovun.s16 d14,q9 125 vqmovun.s16 d16,q15 126 127 vst2.8 {d2,d3},[r11],r3 128 vst2.8 {d4,d5},[r11],r3 129 vst2.8 {d6,d7},[r11],r3 130 vst2.8 {d8,d9},[r11],r3 131 132 vst2.8 {d10,d11},[r11],r3 133 vst2.8 {d12,d13},[r11],r3 134 vst2.8 {d14,d15},[r11],r3 135 vst2.8 {d16,d17},[r11] 136 137 add r1,r1,#16 138 139 subs r9,r9,#8 140 bgt col_loop_chroma 141 142 subs r8,r8,#8 143 144 add r0,r0,r2,lsl #3 145 add r1,r1,r3,lsl #3 146 sub r0,r0,r4,lsl #1 147 sub r1,r1,r4,lsl #1 148 bgt row_loop_chroma 149 b end_loops_chroma 150 151 152row_loop_4chroma: 153 mov r9,r10 154 155 156col_loop_4chroma: 157 158 159 vld2.8 {d2,d3},[r0],r2 160 vld2.8 {d4,d5},[r0],r2 161 vld2.8 {d6,d7},[r0],r2 162 vld2.8 {d8,d9},[r0] 163 164 165 166 167 vaddw.u8 q15,q0,d2 168 vaddw.u8 q14,q0,d4 169 vaddw.u8 q13,q0,d6 170 vaddw.u8 q12,q0,d8 171 172 173 174 vqmovun.s16 d2,q15 175 vqmovun.s16 d4,q14 176 vqmovun.s16 d6,q13 177 vqmovun.s16 d8,q12 178 179 180 vzip.8 d2,d3 181 vzip.8 d4,d5 182 vzip.8 d6,d7 183 vzip.8 d8,d9 184 185 vst1.u32 {d2},[r1],r3 186 vst1.u32 {d4},[r1],r3 187 vst1.u32 {d6},[r1],r3 188 vst1.u32 {d8},[r1] 189 190end_loops_chroma: 191 vpop {d8-d15} 192 pop {r0-r11,pc} 193 194 195