1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_itrans_recon.c
22  *
23  * @brief
24  *  Contains function definitions for inverse transform  and reconstruction
25  *
26  *
27  * @author
28  *  100470
29  *
30  * @par List of Functions:
31  *  - ihevc_itrans_recon_4x4_ttype1()
32  *  - ihevc_itrans_recon_4x4()
33  *
34  * @remarks
35  *  None
36  *
37  *******************************************************************************
38  */
39 #include <stdio.h>
40 #include <string.h>
41 #include "ihevc_typedefs.h"
42 #include "ihevc_macros.h"
43 #include "ihevc_platform_macros.h"
44 #include "ihevc_defs.h"
45 #include "ihevc_trans_tables.h"
46 #include "ihevc_itrans_recon.h"
47 #include "ihevc_func_selector.h"
48 #include "ihevc_trans_macros.h"
49 
50 /* All the functions here are replicated from ihevc_itrans.c and modified to */
51 /* include reconstruction */
52 
53 /**
54  *******************************************************************************
55  *
56  * @brief
57  *  This function performs Inverse transform type 1 (DST)  and reconstruction
58  * for 4x4 input block
59  *
60  * @par Description:
61  *  Performs inverse transform and adds the prediction  data and clips output
62  * to 8 bit
63  *
64  * @param[in] pi2_src
65  *  Input 4x4 coefficients
66  *
67  * @param[in] pi2_tmp
68  *  Temporary 4x4 buffer for storing inverse
69  *
70  *  transform
71  *  1st stage output
72  *
73  * @param[in] pu1_pred
74  *  Prediction 4x4 block
75  *
76  * @param[out] pu1_dst
77  *  Output 4x4 block
78  *
79  * @param[in] src_strd
80  *  Input stride
81  *
82  * @param[in] pred_strd
83  *  Prediction stride
84  *
85  * @param[in] dst_strd
86  *  Output Stride
87  *
88  * @param[in] zero_cols
89  *  Zero columns in pi2_src
90  *
91  * @returns  Void
92  *
93  * @remarks
94  *  None
95  *
96  *******************************************************************************
97  */
98 
ihevc_itrans_recon_4x4_ttype1(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)99 void ihevc_itrans_recon_4x4_ttype1(WORD16 *pi2_src,
100                                    WORD16 *pi2_tmp,
101                                    UWORD8 *pu1_pred,
102                                    UWORD8 *pu1_dst,
103                                    WORD32 src_strd,
104                                    WORD32 pred_strd,
105                                    WORD32 dst_strd,
106                                    WORD32 zero_cols,
107                                    WORD32 zero_rows)
108 {
109     WORD32 i, c[4];
110     WORD32 add;
111     WORD32 shift;
112     WORD16 *pi2_tmp_orig;
113     WORD32 trans_size;
114     UNUSED(zero_rows);
115     trans_size = TRANS_SIZE_4;
116 
117     pi2_tmp_orig = pi2_tmp;
118 
119     /* Inverse Transform 1st stage */
120     shift = IT_SHIFT_STAGE_1;
121     add = 1 << (shift - 1);
122 
123     for(i = 0; i < trans_size; i++)
124     {
125         /* Checking for Zero Cols */
126         if((zero_cols & 1) == 1)
127         {
128             memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
129         }
130         else
131         {
132             // Intermediate Variables
133             c[0] = pi2_src[0] + pi2_src[2 * src_strd];
134             c[1] = pi2_src[2 * src_strd] + pi2_src[3 * src_strd];
135             c[2] = pi2_src[0] - pi2_src[3 * src_strd];
136             c[3] = 74 * pi2_src[src_strd];
137 
138             pi2_tmp[0] =
139                             CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
140             pi2_tmp[1] =
141                             CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
142             pi2_tmp[2] =
143                             CLIP_S16((74 * (pi2_src[0] - pi2_src[2 * src_strd] + pi2_src[3 * src_strd]) + add) >> shift);
144             pi2_tmp[3] =
145                             CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
146         }
147         pi2_src++;
148         pi2_tmp += trans_size;
149         zero_cols = zero_cols >> 1;
150     }
151 
152     pi2_tmp = pi2_tmp_orig;
153 
154     /* Inverse Transform 2nd stage */
155     shift = IT_SHIFT_STAGE_2;
156     add = 1 << (shift - 1);
157 
158     for(i = 0; i < trans_size; i++)
159     {
160         WORD32 itrans_out;
161         // Intermediate Variables
162         c[0] = pi2_tmp[0] + pi2_tmp[2 * trans_size];
163         c[1] = pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size];
164         c[2] = pi2_tmp[0] - pi2_tmp[3 * trans_size];
165         c[3] = 74 * pi2_tmp[trans_size];
166 
167         itrans_out =
168                         CLIP_S16((29 * c[0] + 55 * c[1] + c[3] + add) >> shift);
169         pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
170         itrans_out =
171                         CLIP_S16((55 * c[2] - 29 * c[1] + c[3] + add) >> shift);
172         pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
173         itrans_out =
174                         CLIP_S16((74 * (pi2_tmp[0] - pi2_tmp[2 * trans_size] + pi2_tmp[3 * trans_size]) + add) >> shift);
175         pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
176         itrans_out =
177                         CLIP_S16((55 * c[0] + 29 * c[2] - c[3] + add) >> shift);
178         pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
179         pi2_tmp++;
180         pu1_pred += pred_strd;
181         pu1_dst += dst_strd;
182     }
183 }
184 
185 /**
186  *******************************************************************************
187  *
188  * @brief
189  *  This function performs Inverse transform  and reconstruction for 4x4
190  * input block
191  *
192  * @par Description:
193  *  Performs inverse transform and adds the prediction  data and clips output
194  * to 8 bit
195  *
196  * @param[in] pi2_src
197  *  Input 4x4 coefficients
198  *
199  * @param[in] pi2_tmp
200  *  Temporary 4x4 buffer for storing inverse
201  *
202  *  transform
203  *  1st stage output
204  *
205  * @param[in] pu1_pred
206  *  Prediction 4x4 block
207  *
208  * @param[out] pu1_dst
209  *  Output 4x4 block
210  *
211  * @param[in] src_strd
212  *  Input stride
213  *
214  * @param[in] pred_strd
215  *  Prediction stride
216  *
217  * @param[in] dst_strd
218  *  Output Stride
219  *
220  * @param[in] shift
221  *  Output shift
222  *
223  * @param[in] zero_cols
224  *  Zero columns in pi2_src
225  *
226  * @returns  Void
227  *
228  * @remarks
229  *  None
230  *
231  *******************************************************************************
232  */
233 
ihevc_itrans_recon_4x4(WORD16 * pi2_src,WORD16 * pi2_tmp,UWORD8 * pu1_pred,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 pred_strd,WORD32 dst_strd,WORD32 zero_cols,WORD32 zero_rows)234 void ihevc_itrans_recon_4x4(WORD16 *pi2_src,
235                             WORD16 *pi2_tmp,
236                             UWORD8 *pu1_pred,
237                             UWORD8 *pu1_dst,
238                             WORD32 src_strd,
239                             WORD32 pred_strd,
240                             WORD32 dst_strd,
241                             WORD32 zero_cols,
242                             WORD32 zero_rows)
243 
244 {
245     WORD32 j;
246     WORD32 e[2], o[2];
247     WORD32 add;
248     WORD32 shift;
249     WORD16 *pi2_tmp_orig;
250     WORD32 trans_size;
251     UNUSED(zero_rows);
252     trans_size = TRANS_SIZE_4;
253 
254     pi2_tmp_orig = pi2_tmp;
255 
256     /* Inverse Transform 1st stage */
257     shift = IT_SHIFT_STAGE_1;
258     add = 1 << (shift - 1);
259 
260     for(j = 0; j < trans_size; j++)
261     {
262         /* Checking for Zero Cols */
263         if((zero_cols & 1) == 1)
264         {
265             memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
266         }
267         else
268         {
269 
270             /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
271             o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_src[src_strd]
272                             + g_ai2_ihevc_trans_4[3][0] * pi2_src[3 * src_strd];
273             o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_src[src_strd]
274                             + g_ai2_ihevc_trans_4[3][1] * pi2_src[3 * src_strd];
275             e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_src[0]
276                             + g_ai2_ihevc_trans_4[2][0] * pi2_src[2 * src_strd];
277             e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_src[0]
278                             + g_ai2_ihevc_trans_4[2][1] * pi2_src[2 * src_strd];
279 
280             pi2_tmp[0] =
281                             CLIP_S16(((e[0] + o[0] + add) >> shift));
282             pi2_tmp[1] =
283                             CLIP_S16(((e[1] + o[1] + add) >> shift));
284             pi2_tmp[2] =
285                             CLIP_S16(((e[1] - o[1] + add) >> shift));
286             pi2_tmp[3] =
287                             CLIP_S16(((e[0] - o[0] + add) >> shift));
288 
289         }
290         pi2_src++;
291         pi2_tmp += trans_size;
292         zero_cols = zero_cols >> 1;
293     }
294 
295     pi2_tmp = pi2_tmp_orig;
296 
297     /* Inverse Transform 2nd stage */
298     shift = IT_SHIFT_STAGE_2;
299     add = 1 << (shift - 1);
300 
301     for(j = 0; j < trans_size; j++)
302     {
303         WORD32 itrans_out;
304         /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
305         o[0] = g_ai2_ihevc_trans_4[1][0] * pi2_tmp[trans_size]
306                         + g_ai2_ihevc_trans_4[3][0] * pi2_tmp[3 * trans_size];
307         o[1] = g_ai2_ihevc_trans_4[1][1] * pi2_tmp[trans_size]
308                         + g_ai2_ihevc_trans_4[3][1] * pi2_tmp[3 * trans_size];
309         e[0] = g_ai2_ihevc_trans_4[0][0] * pi2_tmp[0]
310                         + g_ai2_ihevc_trans_4[2][0] * pi2_tmp[2 * trans_size];
311         e[1] = g_ai2_ihevc_trans_4[0][1] * pi2_tmp[0]
312                         + g_ai2_ihevc_trans_4[2][1] * pi2_tmp[2 * trans_size];
313 
314         itrans_out =
315                         CLIP_S16(((e[0] + o[0] + add) >> shift));
316         pu1_dst[0] = CLIP_U8((itrans_out + pu1_pred[0]));
317         itrans_out =
318                         CLIP_S16(((e[1] + o[1] + add) >> shift));
319         pu1_dst[1] = CLIP_U8((itrans_out + pu1_pred[1]));
320         itrans_out =
321                         CLIP_S16(((e[1] - o[1] + add) >> shift));
322         pu1_dst[2] = CLIP_U8((itrans_out + pu1_pred[2]));
323         itrans_out =
324                         CLIP_S16(((e[0] - o[0] + add) >> shift));
325         pu1_dst[3] = CLIP_U8((itrans_out + pu1_pred[3]));
326 
327         pi2_tmp++;
328         pu1_pred += pred_strd;
329         pu1_dst += dst_strd;
330 
331     }
332 }
333 
334