1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_chroma_itrans_recon_8x8.c
22  *
23  * @brief
24  *  Contains function definitions for 8x8 inverse transform  and reconstruction
25  * of chroma interleaved data.
26  *
27  * @author
28  *  100470
29  *
30  * @par List of Functions:
31  *  - ihevc_chroma_itrans_recon_8x8()
32  *
33  * @remarks
34  *  None
35  *
36  *******************************************************************************
37  */
38 
39 #include <stdio.h>
40 #include <string.h>
41 #include "ihevc_typedefs.h"
42 #include "ihevc_macros.h"
43 #include "ihevc_platform_macros.h"
44 #include "ihevc_defs.h"
45 #include "ihevc_trans_tables.h"
46 #include "ihevc_chroma_itrans_recon.h"
47 #include "ihevc_func_selector.h"
48 #include "ihevc_trans_macros.h"
49 
50 /* All the functions work one component(U or V) of interleaved data depending upon pointers passed to it */
51 /* Data visualization */
52 /* U V U V U V U V */
53 /* U V U V U V U V */
54 /* U V U V U V U V */
55 /* U V U V U V U V */
56 /* If the pointer points to first byte of above stream (U) , functions will operate on U component */
57 /* If the pointer points to second byte of above stream (V) , functions will operate on V component */
58 
59 /**
60  *******************************************************************************
61  *
62  * @brief
63  *  This function performs Inverse transform  and reconstruction for 8x8
64  * input block
65  *
66  * @par Description:
67  *  Performs inverse transform and adds the prediction  data and clips output
68  * to 8 bit
69  *
70  * @param[in] pi2_src
71  *  Input 8x8 coefficients
72  *
73  * @param[in] pi2_tmp
74  *  Temporary 8x8 buffer for storing inverse transform
75  *  1st stage output
76  *
77  * @param[in] pu1_pred
78  *  Prediction 8x8 block
79  *
80  * @param[out] pu1_dst
81  *  Output 8x8 block
82  *
83  * @param[in] src_strd
84  *  Input stride
85  *
86  * @param[in] pred_strd
87  *  Prediction stride
88  *
89  * @param[in] dst_strd
90  *  Output Stride
91  *
92  * @param[in] shift
93  *  Output shift
94  *
95  * @param[in] zero_cols
96  *  Zero columns in pi2_src
97  *
98  * @returns  Void
99  *
100  * @remarks
101  *  None
102  *
103  *******************************************************************************
104  */
105 
106 
ihevc_chroma_itrans_recon_8x8(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)107 void ihevc_chroma_itrans_recon_8x8(WORD16 *pi2_src,
108                                    WORD16 *pi2_tmp,
109                                    UWORD8 *pu1_pred,
110                                    UWORD8 *pu1_dst,
111                                    WORD32 src_strd,
112                                    WORD32 pred_strd,
113                                    WORD32 dst_strd,
114                                    WORD32 zero_cols,
115                                    WORD32 zero_rows)
116 {
117     WORD32 j, k;
118     WORD32 e[4], o[4];
119     WORD32 ee[2], eo[2];
120     WORD32 add;
121     WORD32 shift;
122     WORD16 *pi2_tmp_orig;
123     WORD32 trans_size;
124     WORD32 zero_rows_2nd_stage = zero_cols;
125     WORD32 row_limit_2nd_stage;
126     UNUSED(zero_rows);
127     trans_size = TRANS_SIZE_8;
128 
129     pi2_tmp_orig = pi2_tmp;
130 
131     if((zero_cols & 0xF0) == 0xF0)
132         row_limit_2nd_stage = 4;
133     else
134         row_limit_2nd_stage = TRANS_SIZE_8;
135 
136     /* Inverse Transform 1st stage */
137     shift = IT_SHIFT_STAGE_1;
138     add = 1 << (shift - 1);
139     {
140         /************************************************************************************************/
141         /**********************************START - IT_RECON_8x8******************************************/
142         /************************************************************************************************/
143 
144         for(j = 0; j < row_limit_2nd_stage; j++)
145         {
146             /* Checking for Zero Cols */
147             if((zero_cols & 1) == 1)
148             {
149                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
150             }
151             else
152             {
153                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
154                 for(k = 0; k < 4; k++)
155                 {
156                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
157                                     + g_ai2_ihevc_trans_8[3][k]
158                                                     * pi2_src[3 * src_strd]
159                                     + g_ai2_ihevc_trans_8[5][k]
160                                                     * pi2_src[5 * src_strd]
161                                     + g_ai2_ihevc_trans_8[7][k]
162                                                     * pi2_src[7 * src_strd];
163                 }
164 
165                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
166                                 + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
167                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
168                                 + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
169                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
170                                 + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
171                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
172                                 + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
173 
174                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
175                 e[0] = ee[0] + eo[0];
176                 e[3] = ee[0] - eo[0];
177                 e[1] = ee[1] + eo[1];
178                 e[2] = ee[1] - eo[1];
179                 for(k = 0; k < 4; k++)
180                 {
181                     pi2_tmp[k] =
182                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
183                     pi2_tmp[k + 4] =
184                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
185                 }
186             }
187             pi2_src++;
188             pi2_tmp += trans_size;
189             zero_cols = zero_cols >> 1;
190         }
191 
192         pi2_tmp = pi2_tmp_orig;
193 
194         /* Inverse Transform 2nd stage */
195         shift = IT_SHIFT_STAGE_2;
196         add = 1 << (shift - 1);
197 
198         if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
199         {
200             for(j = 0; j < trans_size; j++)
201             {
202                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
203                 for(k = 0; k < 4; k++)
204                 {
205                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
206                                     + g_ai2_ihevc_trans_8[3][k]
207                                                     * pi2_tmp[3 * trans_size];
208                 }
209                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
210                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
211                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
212                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
213 
214                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
215                 e[0] = ee[0] + eo[0];
216                 e[3] = ee[0] - eo[0];
217                 e[1] = ee[1] + eo[1];
218                 e[2] = ee[1] - eo[1];
219                 for(k = 0; k < 4; k++)
220                 {
221                     WORD32 itrans_out;
222                     itrans_out =
223                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
224                     pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
225                     itrans_out =
226                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
227                     pu1_dst[(k + 4) * 2] =
228                                     CLIP_U8((itrans_out + pu1_pred[(k + 4) * 2]));
229                 }
230                 pi2_tmp++;
231                 pu1_pred += pred_strd;
232                 pu1_dst += dst_strd;
233             }
234         }
235         else /* All rows of output of 1st stage are non-zero */
236         {
237             for(j = 0; j < trans_size; j++)
238             {
239                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
240                 for(k = 0; k < 4; k++)
241                 {
242                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
243                                     + g_ai2_ihevc_trans_8[3][k]
244                                                     * pi2_tmp[3 * trans_size]
245                                     + g_ai2_ihevc_trans_8[5][k]
246                                                     * pi2_tmp[5 * trans_size]
247                                     + g_ai2_ihevc_trans_8[7][k]
248                                                     * pi2_tmp[7 * trans_size];
249                 }
250 
251                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
252                                 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
253                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
254                                 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
255                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
256                                 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
257                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
258                                 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
259 
260                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
261                 e[0] = ee[0] + eo[0];
262                 e[3] = ee[0] - eo[0];
263                 e[1] = ee[1] + eo[1];
264                 e[2] = ee[1] - eo[1];
265                 for(k = 0; k < 4; k++)
266                 {
267                     WORD32 itrans_out;
268                     itrans_out =
269                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
270                     pu1_dst[k * 2] = CLIP_U8((itrans_out + pu1_pred[k * 2]));
271                     itrans_out =
272                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
273                     pu1_dst[(k + 4) * 2] =
274                                     CLIP_U8((itrans_out + pu1_pred[(k + 4) * 2]));
275                 }
276                 pi2_tmp++;
277                 pu1_pred += pred_strd;
278                 pu1_dst += dst_strd;
279             }
280         }
281         /************************************************************************************************/
282         /************************************END - IT_RECON_8x8******************************************/
283         /************************************************************************************************/
284     }
285 }
286