1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21  *******************************************************************************
22  * @file
23  *  ih264_iquant_itrans_recon.c
24  *
25  * @brief
26  *  Contains definition of functions for h264 inverse quantization inverse transformation and recon
27  *
28  * @author
29  *  Ittiam
30  *
31  *  @par List of Functions:
32  *  - ih264_iquant_itrans_recon_4x4()
33  *  - ih264_iquant_itrans_recon_8x8()
34  *  - ih264_iquant_itrans_recon_4x4_dc()
35  *  - ih264_iquant_itrans_recon_8x8_dc()
36  *  - ih264_iquant_itrans_recon_chroma_4x4()
37  *  -ih264_iquant_itrans_recon_chroma_4x4_dc()
38  *
39  * @remarks
40  *
41  *******************************************************************************
42  */
43 
44 /*****************************************************************************/
45 /* File Includes                                                             */
46 /*****************************************************************************/
47 
48 /* User include files */
49 #include "ih264_typedefs.h"
50 #include "ih264_defs.h"
51 #include "ih264_trans_macros.h"
52 #include "ih264_macros.h"
53 #include "ih264_platform_macros.h"
54 #include "ih264_trans_data.h"
55 #include "ih264_size_defs.h"
56 #include "ih264_structs.h"
57 #include "ih264_trans_quant_itrans_iquant.h"
58 
59 /*
60  ********************************************************************************
61  *
62  * @brief This function reconstructs a 4x4 sub block from quantized resiude and
63  * prediction buffer
64  *
65  * @par Description:
66  *  The quantized residue is first inverse quantized, then inverse transformed.
67  *  This inverse transformed content is added to the prediction buffer to recon-
68  *  struct the end output
69  *
70  * @param[in] pi2_src
71  *  quantized 4x4 block
72  *
73  * @param[in] pu1_pred
74  *  prediction 4x4 block
75  *
76  * @param[out] pu1_out
77  *  reconstructed 4x4 block
78  *
79  * @param[in] src_strd
80  *  quantization buffer stride
81  *
82  * @param[in] pred_strd,
83  *  Prediction buffer stride
84  *
85  * @param[in] out_strd
86  *  recon buffer Stride
87  *
88  * @param[in] pu2_scaling_list
89  *  pointer to scaling list
90  *
91  * @param[in] pu2_norm_adjust
92  *  pointer to inverse scale matrix
93  *
94  * @param[in] u4_qp_div_6
95  *  Floor (qp/6)
96  *
97  * @param[in] pi4_tmp
98  * temporary buffer of size 1*16
99  *
100  * @returns none
101  *
102  * @remarks none
103  *
104  *******************************************************************************
105  */
ih264_iquant_itrans_recon_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)106 void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
107                                    UWORD8 *pu1_pred,
108                                    UWORD8 *pu1_out,
109                                    WORD32 pred_strd,
110                                    WORD32 out_strd,
111                                    const UWORD16 *pu2_iscal_mat,
112                                    const UWORD16 *pu2_weigh_mat,
113                                    UWORD32 u4_qp_div_6,
114                                    WORD16 *pi2_tmp,
115                                    WORD32 iq_start_idx,
116                                    WORD16 *pi2_dc_ld_addr
117 )
118 {
119     WORD16 *pi2_src_ptr = pi2_src;
120     WORD16 *pi2_tmp_ptr = pi2_tmp;
121     UWORD8 *pu1_pred_ptr = pu1_pred;
122     UWORD8 *pu1_out_ptr = pu1_out;
123     WORD16 x0, x1, x2, x3, i;
124     WORD32 q0, q1, q2, q3;
125     WORD16 i_macro;
126     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
127 
128     /* inverse quant */
129     /*horizontal inverse transform */
130     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
131     {
132         q0 = pi2_src_ptr[0];
133         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact,
134                   4);
135         if (i==0 && iq_start_idx == 1)
136             q0 = pi2_dc_ld_addr[0];     // Restoring dc value for intra case
137 
138         q2 = pi2_src_ptr[2];
139         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
140                   4);
141 
142         x0 = q0 + q2;
143         x1 = q0 - q2;
144 
145         q1 = pi2_src_ptr[1];
146         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
147                   4);
148 
149         q3 = pi2_src_ptr[3];
150         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
151                   4);
152 
153         x2 = (q1 >> 1) - q3;
154         x3 = q1 + (q3 >> 1);
155 
156         pi2_tmp_ptr[0] = x0 + x3;
157         pi2_tmp_ptr[1] = x1 + x2;
158         pi2_tmp_ptr[2] = x1 - x2;
159         pi2_tmp_ptr[3] = x0 - x3;
160 
161         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
162         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
163         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
164         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
165     }
166 
167     /* vertical inverse transform */
168     pi2_tmp_ptr = pi2_tmp;
169     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
170     {
171         pu1_pred_ptr = pu1_pred;
172         pu1_out = pu1_out_ptr;
173 
174         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
175         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
176         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
177         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
178 
179         /* inverse prediction */
180         i_macro = x0 + x3;
181         i_macro = ((i_macro + 32) >> 6);
182         i_macro += *pu1_pred_ptr;
183         *pu1_out = CLIP_U8(i_macro);
184         pu1_pred_ptr += pred_strd;
185         pu1_out += out_strd;
186 
187         i_macro = x1 + x2;
188         i_macro = ((i_macro + 32) >> 6);
189         i_macro += *pu1_pred_ptr;
190         *pu1_out = CLIP_U8(i_macro);
191         pu1_pred_ptr += pred_strd;
192         pu1_out += out_strd;
193 
194         i_macro = x1 - x2;
195         i_macro = ((i_macro + 32) >> 6);
196         i_macro += *pu1_pred_ptr;
197         *pu1_out = CLIP_U8(i_macro);
198         pu1_pred_ptr += pred_strd;
199         pu1_out += out_strd;
200 
201         i_macro = x0 - x3;
202         i_macro = ((i_macro + 32) >> 6);
203         i_macro += *pu1_pred_ptr;
204         *pu1_out = CLIP_U8(i_macro);
205 
206         pi2_tmp_ptr++;
207         pu1_out_ptr++;
208         pu1_pred++;
209     }
210 
211 }
212 
ih264_iquant_itrans_recon_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)213 void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
214                                       UWORD8 *pu1_pred,
215                                       UWORD8 *pu1_out,
216                                       WORD32 pred_strd,
217                                       WORD32 out_strd,
218                                       const UWORD16 *pu2_iscal_mat,
219                                       const UWORD16 *pu2_weigh_mat,
220                                       UWORD32 u4_qp_div_6,
221                                       WORD16 *pi2_tmp,
222                                       WORD32 iq_start_idx,
223                                       WORD16 *pi2_dc_ld_addr)
224 {
225     UWORD8 *pu1_pred_ptr = pu1_pred;
226     UWORD8 *pu1_out_ptr = pu1_out;
227     WORD32 q0;
228     WORD16 x, i_macro, i;
229     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
230     UNUSED(pi2_tmp);
231 
232     if (iq_start_idx == 0)
233     {
234       q0 = pi2_src[0];
235       INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
236     }
237     else
238     {
239       q0 = pi2_dc_ld_addr[0];    // Restoring dc value for intra case3
240     }
241     i_macro = ((q0 + 32) >> 6);
242     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
243     {
244         pu1_pred_ptr = pu1_pred;
245         pu1_out = pu1_out_ptr;
246 
247         /* inverse prediction */
248 
249         x = i_macro + *pu1_pred_ptr;
250         *pu1_out = CLIP_U8(x);
251         pu1_pred_ptr += pred_strd;
252         pu1_out += out_strd;
253 
254         x = i_macro + *pu1_pred_ptr;
255         *pu1_out = CLIP_U8(x);
256         pu1_pred_ptr += pred_strd;
257         pu1_out += out_strd;
258 
259         x = i_macro + *pu1_pred_ptr;
260         *pu1_out = CLIP_U8(x);
261         pu1_pred_ptr += pred_strd;
262         pu1_out += out_strd;
263 
264         x = i_macro + *pu1_pred_ptr;
265         *pu1_out = CLIP_U8(x);
266 
267         pu1_out_ptr++;
268         pu1_pred++;
269     }
270 }
271 
272 /**
273  *******************************************************************************
274  *
275  * @brief
276  *  This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
277  *
278  * @par Description:
279  *  Performs inverse transform Ci8 and adds the residue to get the
280  *  reconstructed block
281  *
282  * @param[in] pi2_src
283  *  Input 8x8coefficients
284  *
285  * @param[in] pu1_pred
286  *  Prediction 8x8 block
287  *
288  * @param[out] pu1_recon
289  *  Output 8x8 block
290  *
291  * @param[in] q_div
292  *  QP/6
293  *
294  * @param[in] q_rem
295  *  QP%6
296  *
297  * @param[in] q_lev
298  *  Quantizer level
299  *
300  * @param[in] src_strd
301  *  Input stride
302  *
303  * @param[in] pred_strd,
304  *  Prediction stride
305  *
306  * @param[in] out_strd
307  *  Output Stride
308  *
309  * @param[in] pi4_tmp
310  *  temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
311  *  the tmp for each block
312  *
313  * @param[in] pu4_iquant_mat
314  *  Pointer to the inverse quantization matrix
315  *
316  * @returns  Void
317  *
318  * @remarks
319  *  None
320  *
321  *******************************************************************************
322  */
ih264_iquant_itrans_recon_8x8(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)323 void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
324                                    UWORD8 *pu1_pred,
325                                    UWORD8 *pu1_out,
326                                    WORD32 pred_strd,
327                                    WORD32 out_strd,
328                                    const UWORD16 *pu2_iscale_mat,
329                                    const UWORD16 *pu2_weigh_mat,
330                                    UWORD32 qp_div,
331                                    WORD16 *pi2_tmp,
332                                    WORD32 iq_start_idx,
333                                    WORD16 *pi2_dc_ld_addr
334 )
335 {
336     WORD32 i;
337     WORD16 *pi2_tmp_ptr = pi2_tmp;
338     UWORD8 *pu1_pred_ptr = pu1_pred;
339     UWORD8 *pu1_out_ptr = pu1_out;
340     WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
341     WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
342     WORD16 i_macro;
343     WORD32 q;
344     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
345     UNUSED(iq_start_idx);
346     UNUSED(pi2_dc_ld_addr);
347     /*************************************************************/
348     /* De quantization of coefficients. Will be replaced by SIMD */
349     /* operations on platform. Note : DC coeff is not scaled     */
350     /*************************************************************/
351     for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
352     {
353         q = pi2_src[i];
354         INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
355         pi2_tmp_ptr[i] = q;
356     }
357     /* Perform Inverse transform */
358     /*--------------------------------------------------------------------*/
359     /* IDCT [ Horizontal transformation ]                                 */
360     /*--------------------------------------------------------------------*/
361     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
362     {
363         /*------------------------------------------------------------------*/
364         /* y0 = w0 + w4                                                     */
365         /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
366         /* y2 = w0 - w4                                                     */
367         /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
368         /* y4 = (w2 >> 1) - w6                                              */
369         /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
370         /* y6 = w2 + (w6 >> 1)                                              */
371         /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
372         /*------------------------------------------------------------------*/
373         i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
374 
375         i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
376                         - (pi2_tmp_ptr[7] >> 1));
377 
378         i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
379 
380         i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
381                         - (pi2_tmp_ptr[3] >> 1));
382 
383         i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
384 
385         i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
386                         + (pi2_tmp_ptr[5] >> 1));
387 
388         i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
389 
390         i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
391                         + (pi2_tmp_ptr[1] >> 1));
392 
393         /*------------------------------------------------------------------*/
394         /* z0 = y0 + y6                                                     */
395         /* z1 = y1 + (y7 >> 2)                                              */
396         /* z2 = y2 + y4                                                     */
397         /* z3 = y3 + (y5 >> 2)                                              */
398         /* z4 = y2 - y4                                                     */
399         /* z5 = (y3 >> 2) - y5                                              */
400         /* z6 = y0 - y6                                                     */
401         /* z7 = y7 - (y1 >> 2)                                              */
402         /*------------------------------------------------------------------*/
403         i_z0 = i_y0 + i_y6;
404         i_z1 = i_y1 + (i_y7 >> 2);
405         i_z2 = i_y2 + i_y4;
406         i_z3 = i_y3 + (i_y5 >> 2);
407         i_z4 = i_y2 - i_y4;
408         i_z5 = (i_y3 >> 2) - i_y5;
409         i_z6 = i_y0 - i_y6;
410         i_z7 = i_y7 - (i_y1 >> 2);
411 
412         /*------------------------------------------------------------------*/
413         /* x0 = z0 + z7                                                     */
414         /* x1 = z2 + z5                                                     */
415         /* x2 = z4 + z3                                                     */
416         /* x3 = z6 + z1                                                     */
417         /* x4 = z6 - z1                                                     */
418         /* x5 = z4 - z3                                                     */
419         /* x6 = z2 - z5                                                     */
420         /* x7 = z0 - z7                                                     */
421         /*------------------------------------------------------------------*/
422         pi2_tmp_ptr[0] = i_z0 + i_z7;
423         pi2_tmp_ptr[1] = i_z2 + i_z5;
424         pi2_tmp_ptr[2] = i_z4 + i_z3;
425         pi2_tmp_ptr[3] = i_z6 + i_z1;
426         pi2_tmp_ptr[4] = i_z6 - i_z1;
427         pi2_tmp_ptr[5] = i_z4 - i_z3;
428         pi2_tmp_ptr[6] = i_z2 - i_z5;
429         pi2_tmp_ptr[7] = i_z0 - i_z7;
430 
431         /* move to the next row */
432         //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
433         pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
434     }
435     /*--------------------------------------------------------------------*/
436     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
437     /*                                                                    */
438     /* Add the prediction and store it back to reconstructed frame buffer */
439     /* [Prediction buffer itself in this case]                            */
440     /*--------------------------------------------------------------------*/
441 
442     pi2_tmp_ptr = pi2_tmp;
443     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
444     {
445         pu1_pred_ptr = pu1_pred;
446         pu1_out = pu1_out_ptr;
447         /*------------------------------------------------------------------*/
448         /* y0j = w0j + w4j                                                  */
449         /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
450         /* y2j = w0j -w4j                                                   */
451         /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
452         /* y4j = ( w2j >> 1 ) -w6j                                          */
453         /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
454         /* y6j = w2j + ( w6j >> 1 )                                         */
455         /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
456         /*------------------------------------------------------------------*/
457         i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
458 
459         i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
460                         - (pi2_tmp_ptr[56] >> 1);
461 
462         i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
463 
464         i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
465                         - (pi2_tmp_ptr[24] >> 1);
466 
467         i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
468 
469         i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
470                         + (pi2_tmp_ptr[40] >> 1);
471 
472         i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
473 
474         i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
475                         + (pi2_tmp_ptr[8] >> 1);
476 
477         /*------------------------------------------------------------------*/
478         /* z0j = y0j + y6j                                                  */
479         /* z1j = y1j + (y7j >> 2)                                           */
480         /* z2j = y2j + y4j                                                  */
481         /* z3j = y3j + (y5j >> 2)                                           */
482         /* z4j = y2j -y4j                                                   */
483         /* z5j = (y3j >> 2) -y5j                                            */
484         /* z6j = y0j -y6j                                                   */
485         /* z7j = y7j -(y1j >> 2)                                            */
486         /*------------------------------------------------------------------*/
487         i_z0 = i_y0 + i_y6;
488         i_z1 = i_y1 + (i_y7 >> 2);
489         i_z2 = i_y2 + i_y4;
490         i_z3 = i_y3 + (i_y5 >> 2);
491         i_z4 = i_y2 - i_y4;
492         i_z5 = (i_y3 >> 2) - i_y5;
493         i_z6 = i_y0 - i_y6;
494         i_z7 = i_y7 - (i_y1 >> 2);
495 
496         /*------------------------------------------------------------------*/
497         /* x0j = z0j + z7j                                                  */
498         /* x1j = z2j + z5j                                                  */
499         /* x2j = z4j + z3j                                                  */
500         /* x3j = z6j + z1j                                                  */
501         /* x4j = z6j -z1j                                                   */
502         /* x5j = z4j -z3j                                                   */
503         /* x6j = z2j -z5j                                                   */
504         /* x7j = z0j -z7j                                                   */
505         /*------------------------------------------------------------------*/
506         i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
507         *pu1_out = CLIP_U8(i_macro);
508         /* Change uc_recBuffer to Point to next element in the same column*/
509         pu1_pred_ptr += pred_strd;
510         pu1_out += out_strd;
511 
512         i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
513         *pu1_out = CLIP_U8(i_macro);
514         pu1_pred_ptr += pred_strd;
515         pu1_out += out_strd;
516 
517         i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
518         *pu1_out = CLIP_U8(i_macro);
519         pu1_pred_ptr += pred_strd;
520         pu1_out += out_strd;
521 
522         i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
523         *pu1_out = CLIP_U8(i_macro);
524         pu1_pred_ptr += pred_strd;
525         pu1_out += out_strd;
526 
527         i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
528         *pu1_out = CLIP_U8(i_macro);
529         pu1_pred_ptr += pred_strd;
530         pu1_out += out_strd;
531 
532         i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
533         *pu1_out = CLIP_U8(i_macro);
534         pu1_pred_ptr += pred_strd;
535         pu1_out += out_strd;
536 
537         i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
538         *pu1_out = CLIP_U8(i_macro);
539         pu1_pred_ptr += pred_strd;
540         pu1_out += out_strd;
541 
542         i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
543         *pu1_out = CLIP_U8(i_macro);
544 
545         pi2_tmp_ptr++;
546         pu1_out_ptr++;
547         pu1_pred++;
548     }
549 }
550 
ih264_iquant_itrans_recon_8x8_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscale_mat,const UWORD16 * pu2_weigh_mat,UWORD32 qp_div,WORD16 * pi2_tmp,WORD32 iq_start_idx,WORD16 * pi2_dc_ld_addr)551 void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
552                                       UWORD8 *pu1_pred,
553                                       UWORD8 *pu1_out,
554                                       WORD32 pred_strd,
555                                       WORD32 out_strd,
556                                       const UWORD16 *pu2_iscale_mat,
557                                       const UWORD16 *pu2_weigh_mat,
558                                       UWORD32 qp_div,
559                                       WORD16 *pi2_tmp,
560                                       WORD32 iq_start_idx,
561                                       WORD16 *pi2_dc_ld_addr)
562 {
563     UWORD8 *pu1_pred_ptr = pu1_pred;
564     UWORD8 *pu1_out_ptr = pu1_out;
565     WORD16 x, i, i_macro;
566     WORD32 q;
567     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
568     UNUSED(pi2_tmp);
569     UNUSED(iq_start_idx);
570     UNUSED(pi2_dc_ld_addr);
571     /*************************************************************/
572     /* Dequantization of coefficients. Will be replaced by SIMD  */
573     /* operations on platform. Note : DC coeff is not scaled     */
574     /*************************************************************/
575     q = pi2_src[0];
576     INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
577     i_macro = (q + 32) >> 6;
578     /* Perform Inverse transform */
579     /*--------------------------------------------------------------------*/
580     /* IDCT [ Horizontal transformation ]                                 */
581     /*--------------------------------------------------------------------*/
582     /*--------------------------------------------------------------------*/
583     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
584     /*                                                                    */
585     /* Add the prediction and store it back to reconstructed frame buffer */
586     /* [Prediction buffer itself in this case]                            */
587     /*--------------------------------------------------------------------*/
588     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
589     {
590         pu1_pred_ptr = pu1_pred;
591         pu1_out = pu1_out_ptr;
592 
593         x = i_macro + *pu1_pred_ptr;
594         *pu1_out = CLIP_U8(x);
595         /* Change uc_recBuffer to Point to next element in the same column*/
596         pu1_pred_ptr += pred_strd;
597         pu1_out += out_strd;
598 
599         x = i_macro + *pu1_pred_ptr;
600         *pu1_out = CLIP_U8(x);
601         pu1_pred_ptr += pred_strd;
602         pu1_out += out_strd;
603 
604         x = i_macro + *pu1_pred_ptr;
605         *pu1_out = CLIP_U8(x);
606         pu1_pred_ptr += pred_strd;
607         pu1_out += out_strd;
608 
609         x = i_macro + *pu1_pred_ptr;
610         *pu1_out = CLIP_U8(x);
611         pu1_pred_ptr += pred_strd;
612         pu1_out += out_strd;
613 
614         x = i_macro + *pu1_pred_ptr;
615         *pu1_out = CLIP_U8(x);
616         pu1_pred_ptr += pred_strd;
617         pu1_out += out_strd;
618 
619         x = i_macro + *pu1_pred_ptr;
620         *pu1_out = CLIP_U8(x);
621         pu1_pred_ptr += pred_strd;
622         pu1_out += out_strd;
623 
624         x = i_macro + *pu1_pred_ptr;
625         *pu1_out = CLIP_U8(x);
626         pu1_pred_ptr += pred_strd;
627         pu1_out += out_strd;
628 
629         x = i_macro + *pu1_pred_ptr;
630         *pu1_out = CLIP_U8(x);
631 
632         pu1_out_ptr++;
633         pu1_pred++;
634     }
635 }
636 
637 /*
638  ********************************************************************************
639  *
640  * @brief This function reconstructs a 4x4 sub block from quantized resiude and
641  * prediction buffer
642  *
643  * @par Description:
644  *  The quantized residue is first inverse quantized, then inverse transformed.
645  *  This inverse transformed content is added to the prediction buffer to recon-
646  *  struct the end output
647  *
648  * @param[in] pi2_src
649  *  quantized 4x4 block
650  *
651  * @param[in] pu1_pred
652  *  prediction 4x4 block
653  *
654  * @param[out] pu1_out
655  *  reconstructed 4x4 block
656  *
657  * @param[in] src_strd
658  *  quantization buffer stride
659  *
660  * @param[in] pred_strd,
661  *  Prediction buffer stride
662  *
663  * @param[in] out_strd
664  *  recon buffer Stride
665  *
666  * @param[in] pu2_scaling_list
667  *  pointer to scaling list
668  *
669  * @param[in] pu2_norm_adjust
670  *  pointer to inverse scale matrix
671  *
672  * @param[in] u4_qp_div_6
673  *  Floor (qp/6)
674  *
675  * @param[in] pi4_tmp
676  * temporary buffer of size 1*16
677  *
678  * @returns none
679  *
680  * @remarks none
681  *
682  *******************************************************************************
683  */
ih264_iquant_itrans_recon_chroma_4x4(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)684 void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
685                                           UWORD8 *pu1_pred,
686                                           UWORD8 *pu1_out,
687                                           WORD32 pred_strd,
688                                           WORD32 out_strd,
689                                           const UWORD16 *pu2_iscal_mat,
690                                           const UWORD16 *pu2_weigh_mat,
691                                           UWORD32 u4_qp_div_6,
692                                           WORD16 *pi2_tmp,
693                                           WORD16 *pi2_dc_src)
694 {
695     WORD16 *pi2_src_ptr = pi2_src;
696     WORD16 *pi2_tmp_ptr = pi2_tmp;
697     UWORD8 *pu1_pred_ptr = pu1_pred;
698     UWORD8 *pu1_out_ptr = pu1_out;
699     WORD16 x0, x1, x2, x3, i;
700     WORD32 q0, q1, q2, q3;
701     WORD16 i_macro;
702     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
703 
704     /* inverse quant */
705     /*horizontal inverse transform */
706     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
707     {
708       if(i==0)
709       {
710         q0 = pi2_dc_src[0];
711       }
712       else
713       {
714         q0 = pi2_src_ptr[0];
715         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
716       }
717 
718       q2 = pi2_src_ptr[2];
719       INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
720                 4);
721 
722       x0 = q0 + q2;
723       x1 = q0 - q2;
724 
725       q1 = pi2_src_ptr[1];
726       INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
727                 4);
728 
729       q3 = pi2_src_ptr[3];
730       INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
731                 4);
732 
733       x2 = (q1 >> 1) - q3;
734       x3 = q1 + (q3 >> 1);
735 
736       pi2_tmp_ptr[0] = x0 + x3;
737       pi2_tmp_ptr[1] = x1 + x2;
738       pi2_tmp_ptr[2] = x1 - x2;
739       pi2_tmp_ptr[3] = x0 - x3;
740 
741       pi2_src_ptr += SUB_BLK_WIDTH_4x4;
742       pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
743       pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
744       pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
745     }
746 
747     /* vertical inverse transform */
748     pi2_tmp_ptr = pi2_tmp;
749     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
750     {
751         pu1_pred_ptr = pu1_pred;
752         pu1_out = pu1_out_ptr;
753 
754         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
755         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
756         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
757         x3 =  pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
758 
759         /* inverse prediction */
760         i_macro = x0 + x3;
761         i_macro = ((i_macro + 32) >> 6);
762         i_macro += *pu1_pred_ptr;
763         *pu1_out = CLIP_U8(i_macro);
764         pu1_pred_ptr += pred_strd;
765         pu1_out += out_strd;
766 
767         i_macro = x1 + x2;
768         i_macro = ((i_macro + 32) >> 6);
769         i_macro += *pu1_pred_ptr;
770         *pu1_out = CLIP_U8(i_macro);
771         pu1_pred_ptr += pred_strd;
772         pu1_out += out_strd;
773 
774         i_macro = x1 - x2;
775         i_macro = ((i_macro + 32) >> 6);
776         i_macro += *pu1_pred_ptr;
777         *pu1_out = CLIP_U8(i_macro);
778         pu1_pred_ptr += pred_strd;
779         pu1_out += out_strd;
780 
781         i_macro = x0 - x3;
782         i_macro = ((i_macro + 32) >> 6);
783         i_macro += *pu1_pred_ptr;
784         *pu1_out = CLIP_U8(i_macro);
785 
786         pi2_tmp_ptr++;
787         pu1_out_ptr+= 2;    //Interleaved store for output
788         pu1_pred+= 2;       //Interleaved load for pred buffer
789     }
790 }
791 
792 /*
793  ********************************************************************************
794  *
795  * @brief This function reconstructs a 4x4 sub block from quantized resiude and
796  * prediction buffer if only dc value is present for residue
797  *
798  * @par Description:
799  *  The quantized residue is first inverse quantized,
800  *  This inverse quantized content is added to the prediction buffer to recon-
801  *  struct the end output
802  *
803  * @param[in] pi2_src
804  *  quantized dc coefficient
805  *
806  * @param[in] pu1_pred
807  *  prediction 4x4 block in interleaved format
808  *
809  * @param[in] pred_strd,
810  *  Prediction buffer stride in interleaved format
811  *
812  * @param[in] out_strd
813  *  recon buffer Stride
814  *
815  * @returns none
816  *
817  * @remarks none
818  *
819  *******************************************************************************
820  */
821 
ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 * pi2_src,UWORD8 * pu1_pred,UWORD8 * pu1_out,WORD32 pred_strd,WORD32 out_strd,const UWORD16 * pu2_iscal_mat,const UWORD16 * pu2_weigh_mat,UWORD32 u4_qp_div_6,WORD16 * pi2_tmp,WORD16 * pi2_dc_src)822 void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
823                                              UWORD8 *pu1_pred,
824                                              UWORD8 *pu1_out,
825                                              WORD32 pred_strd,
826                                              WORD32 out_strd,
827                                              const UWORD16 *pu2_iscal_mat,
828                                              const UWORD16 *pu2_weigh_mat,
829                                              UWORD32 u4_qp_div_6,
830                                              WORD16 *pi2_tmp,
831                                              WORD16 *pi2_dc_src)
832 {
833     UWORD8 *pu1_pred_ptr = pu1_pred;
834     UWORD8 *pu1_out_ptr = pu1_out;
835     WORD32 q0;
836     WORD16 x, i_macro, i;
837     UNUSED(pi2_src);
838     UNUSED(pu2_iscal_mat);
839     UNUSED(pu2_weigh_mat);
840     UNUSED(u4_qp_div_6);
841     UNUSED(pi2_tmp);
842 
843     q0 = pi2_dc_src[0];    // Restoring dc value for intra case3
844     i_macro = ((q0 + 32) >> 6);
845 
846     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
847     {
848         pu1_pred_ptr = pu1_pred;
849         pu1_out = pu1_out_ptr;
850 
851         /* inverse prediction */
852         x = i_macro + *pu1_pred_ptr;
853         *pu1_out =  CLIP_U8(x);
854         pu1_pred_ptr += pred_strd;
855         pu1_out += out_strd;
856 
857         x = i_macro + *pu1_pred_ptr;
858         *pu1_out = CLIP_U8(x);
859         pu1_pred_ptr += pred_strd;
860         pu1_out += out_strd;
861 
862         x = i_macro + *pu1_pred_ptr;
863         *pu1_out = CLIP_U8(x);
864         pu1_pred_ptr += pred_strd;
865         pu1_out += out_strd;
866 
867         x = i_macro + *pu1_pred_ptr;
868         *pu1_out = CLIP_U8(x);
869 
870         pu1_out_ptr+=2;
871         pu1_pred+=2;
872     }
873 }
874