1 /******************************************************************************
2 *
3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 ******************************************************************************/
18 /**
19  *******************************************************************************
20  * @file
21  *  ihevc_inter_pred.c
22  *
23  * @brief
24  *  Calculates the prediction samples for a given cbt
25  *
26  * @author
27  *  Srinivas T
28  *
29  * @par List of Functions:
30  *   - ihevc_inter_pred()
31  *
32  * @remarks
33  *  None
34  *
35  *******************************************************************************
36  */
37 #include <stdio.h>
38 #include <stddef.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <assert.h>
42 
43 #include "ihevc_typedefs.h"
44 #include "iv.h"
45 #include "ivd.h"
46 #include "ihevcd_cxa.h"
47 #include "ithread.h"
48 
49 #include "ihevc_defs.h"
50 #include "ihevc_debug.h"
51 #include "ihevc_structs.h"
52 #include "ihevc_macros.h"
53 #include "ihevc_platform_macros.h"
54 #include "ihevc_cabac_tables.h"
55 #include "ihevc_weighted_pred.h"
56 
57 #include "ihevc_error.h"
58 #include "ihevc_common_tables.h"
59 
60 #include "ihevcd_trace.h"
61 #include "ihevcd_defs.h"
62 #include "ihevcd_function_selector.h"
63 #include "ihevcd_structs.h"
64 #include "ihevcd_error.h"
65 #include "ihevcd_nal.h"
66 #include "ihevcd_bitstream.h"
67 #include "ihevcd_job_queue.h"
68 #include "ihevcd_utils.h"
69 
70 #include "ihevc_inter_pred.h"
71 #include "ihevcd_profile.h"
72 
73 static WORD8 gai1_luma_filter[4][NTAPS_LUMA] =
74 {
75     { 0, 0, 0, 64, 0, 0, 0, 0 },
76     { -1, 4, -10, 58, 17, -5, 1, 0 },
77     { -1, 4, -11, 40, 40, -11, 4, -1 },
78     { 0, 1, -5, 17, 58, -10, 4, -1 } };
79 
80 /* The filter uses only the first four elements in each array */
81 static WORD8 gai1_chroma_filter[8][NTAPS_LUMA] =
82 {
83     { 0, 64, 0, 0, 0, 0, 0, 0 },
84     { -2, 58, 10, -2, 0, 0, 0, 0 },
85     { -4, 54, 16, -2, 0, 0, 0, 0 },
86     { -6, 46, 28, -4, 0, 0, 0, 0 },
87     { -4, 36, 36, -4, 0, 0, 0, 0 },
88     { -4, 28, 46, -6, 0, 0, 0, 0 },
89     { -2, 16, 54, -4, 0, 0, 0, 0 },
90     { -2, 10, 58, -2, 0, 0, 0, 0 } };
91 
92 /**
93 *******************************************************************************
94 *
95 * @brief
96 *  Inter prediction CTB level function
97 *
98 * @par Description:
99 *  For a given CTB, Inter prediction followed by weighted  prediction is
100 * done for all the PUs present in the CTB
101 *
102 * @param[in] ps_ctb
103 *  Pointer to the CTB context
104 *
105 * @returns
106 *
107 * @remarks
108 *
109 *
110 *******************************************************************************
111 */
112 
ihevcd_inter_pred_ctb(process_ctxt_t * ps_proc)113 void ihevcd_inter_pred_ctb(process_ctxt_t *ps_proc)
114 {
115     UWORD8 *ref_pic_luma_l0, *ref_pic_chroma_l0;
116     UWORD8 *ref_pic_luma_l1, *ref_pic_chroma_l1;
117 
118     UWORD8 *ref_pic_l0 = NULL, *ref_pic_l1 = NULL;
119 
120     slice_header_t *ps_slice_hdr;
121     sps_t *ps_sps;
122     pps_t *ps_pps;
123     pu_t *ps_pu;
124     codec_t *ps_codec;
125     WORD32 pu_indx;
126     WORD32 pu_x, pu_y;
127     WORD32 pu_wd, pu_ht;
128     WORD32 i4_pu_cnt;
129     WORD32 cur_ctb_idx;
130 
131     WORD32 clr_indx;
132     WORD32 ntaps;
133 
134 
135 
136     WORD32 ai2_xint[2] = { 0, 0 }, ai2_yint[2] = { 0, 0 };
137     WORD32 ai2_xfrac[2] = { 0, 0 }, ai2_yfrac[2] = { 0, 0 };
138 
139     WORD32 weighted_pred, bi_pred;
140 
141     WORD32 ref_strd;
142     UWORD8 *pu1_dst_luma, *pu1_dst_chroma;
143 
144     UWORD8 *pu1_dst;
145 
146     WORD16 *pi2_tmp1, *pi2_tmp2;
147 
148     WORD32 luma_weight_l0, luma_weight_l1;
149     WORD32 chroma_weight_l0_cb, chroma_weight_l1_cb, chroma_weight_l0_cr, chroma_weight_l1_cr;
150     WORD32 luma_offset_l0, luma_offset_l1;
151     WORD32 chroma_offset_l0_cb, chroma_offset_l1_cb, chroma_offset_l0_cr, chroma_offset_l1_cr;
152     WORD32 shift, lvl_shift1, lvl_shift2;
153 
154     pf_inter_pred func_ptr1, func_ptr2, func_ptr3, func_ptr4;
155     WORD32 func_indx1, func_indx2, func_indx3, func_indx4;
156     void *func_src;
157     void *func_dst;
158     WORD32 func_src_strd;
159     WORD32 func_dst_strd;
160     WORD8 *func_coeff;
161     WORD32 func_wd;
162     WORD32 func_ht;
163     WORD32 next_ctb_idx;
164     WORD8(*coeff)[8];
165     WORD32  chroma_yuv420sp_vu;
166 
167     PROFILE_DISABLE_INTER_PRED();
168     ps_codec = ps_proc->ps_codec;
169     ps_slice_hdr = ps_proc->ps_slice_hdr;
170     ps_pps = ps_proc->ps_pps;
171     ps_sps = ps_proc->ps_sps;
172     cur_ctb_idx = ps_proc->i4_ctb_x
173                     + ps_proc->i4_ctb_y * (ps_sps->i2_pic_wd_in_ctb);
174     /*
175      * In case of tiles, the next ctb belonging to the same tile must be used to get the PU index
176      */
177 
178     next_ctb_idx = ps_proc->i4_next_pu_ctb_cnt;
179     i4_pu_cnt = ps_proc->pu4_pic_pu_idx[next_ctb_idx] - ps_proc->pu4_pic_pu_idx[cur_ctb_idx];
180 
181     ps_pu = ps_proc->ps_pu;
182     ref_strd = ps_codec->i4_strd;
183     pi2_tmp1 = ps_proc->pi2_inter_pred_tmp_buf1;
184     pi2_tmp2 = ps_proc->pi2_inter_pred_tmp_buf2;
185     pu1_dst_luma = ps_proc->pu1_cur_pic_luma;
186     pu1_dst_chroma = ps_proc->pu1_cur_pic_chroma;
187 
188     chroma_yuv420sp_vu = (ps_codec->e_ref_chroma_fmt == IV_YUV_420SP_VU);
189 
190     ASSERT(PSLICE == ps_slice_hdr->i1_slice_type || BSLICE == ps_slice_hdr->i1_slice_type);
191 
192     ref_pic_luma_l0 = NULL;
193     ref_pic_chroma_l0 = NULL;
194 
195     luma_weight_l0 = 0;
196     chroma_weight_l0_cb = 0;
197     chroma_weight_l0_cr = 0;
198 
199     luma_offset_l0 = 0;
200     chroma_offset_l0_cb = 0;
201     chroma_offset_l0_cr = 0;
202 
203     ref_pic_luma_l1 = NULL;
204     ref_pic_chroma_l1 = NULL;
205 
206     luma_weight_l1 = 0;
207     chroma_weight_l1_cb = 0;
208     chroma_weight_l1_cr = 0;
209 
210     luma_offset_l1 = 0;
211     chroma_offset_l1_cb = 0;
212     chroma_offset_l1_cr = 0;
213 
214     for(pu_indx = 0; pu_indx < i4_pu_cnt; pu_indx++, ps_pu++)
215     {
216         /* If the PU is intra then proceed to the next */
217         if(1 == ps_pu->b1_intra_flag)
218             continue;
219         pu_x = (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_x << 2);
220         pu_y = (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size) + (ps_pu->b4_pos_y << 2);
221 
222         pu_wd = (ps_pu->b4_wd + 1) << 2;
223         pu_ht = (ps_pu->b4_ht + 1) << 2;
224 
225         weighted_pred = (ps_slice_hdr->i1_slice_type == PSLICE) ? ps_pps->i1_weighted_pred_flag :
226                         ps_pps->i1_weighted_bipred_flag;
227         bi_pred = (ps_pu->b2_pred_mode == PRED_BI);
228 
229         if(ps_pu->b2_pred_mode != PRED_L1)
230         {
231             pic_buf_t *ps_pic_buf_l0;
232 
233             ps_pic_buf_l0 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list0[ps_pu->mv.i1_l0_ref_idx].pv_pic_buf));
234 
235             ref_pic_luma_l0 = ps_pic_buf_l0->pu1_luma;
236             ref_pic_chroma_l0 = ps_pic_buf_l0->pu1_chroma;
237 
238             luma_weight_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[ps_pu->mv.i1_l0_ref_idx];
239             chroma_weight_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cb[ps_pu->mv.i1_l0_ref_idx];
240             chroma_weight_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l0_cr[ps_pu->mv.i1_l0_ref_idx];
241 
242             luma_offset_l0 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[ps_pu->mv.i1_l0_ref_idx];
243             chroma_offset_l0_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cb[ps_pu->mv.i1_l0_ref_idx];
244             chroma_offset_l0_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l0_cr[ps_pu->mv.i1_l0_ref_idx];
245         }
246 
247         if(ps_pu->b2_pred_mode != PRED_L0)
248         {
249             pic_buf_t *ps_pic_buf_l1;
250             ps_pic_buf_l1 = (pic_buf_t *)((ps_slice_hdr->as_ref_pic_list1[ps_pu->mv.i1_l1_ref_idx].pv_pic_buf));
251             ref_pic_luma_l1 = ps_pic_buf_l1->pu1_luma;
252             ref_pic_chroma_l1 = ps_pic_buf_l1->pu1_chroma;
253 
254             luma_weight_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[ps_pu->mv.i1_l1_ref_idx];
255             chroma_weight_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cb[ps_pu->mv.i1_l1_ref_idx];
256             chroma_weight_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_weight_l1_cr[ps_pu->mv.i1_l1_ref_idx];
257 
258             luma_offset_l1 = ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[ps_pu->mv.i1_l1_ref_idx];
259             chroma_offset_l1_cb = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cb[ps_pu->mv.i1_l1_ref_idx];
260             chroma_offset_l1_cr = ps_slice_hdr->s_wt_ofst.i2_chroma_offset_l1_cr[ps_pu->mv.i1_l1_ref_idx];
261         }
262 
263         /*luma and chroma components*/
264         for(clr_indx = 0; clr_indx < 2; clr_indx++)
265         {
266             PROFILE_DISABLE_INTER_PRED_LUMA(clr_indx);
267             PROFILE_DISABLE_INTER_PRED_CHROMA(clr_indx);
268 
269             if(clr_indx == 0)
270             {
271                 WORD32 mv;
272                 if(ps_pu->b2_pred_mode != PRED_L1)
273                 {
274                     mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
275                     ai2_xint[0] = pu_x + (mv >> 2);
276                     ai2_xfrac[0] = mv & 3;
277 
278                     mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
279                     ai2_yint[0] = pu_y + (mv >> 2);
280                     ai2_yfrac[0] = mv & 3;
281 
282                     ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
283                     ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
284 
285 
286                     ref_pic_l0 = ref_pic_luma_l0 + ai2_yint[0] * ref_strd
287                                     + ai2_xint[0];
288                 }
289 
290                 if(ps_pu->b2_pred_mode != PRED_L0)
291                 {
292 
293                     mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
294                     ai2_xint[1] = pu_x + (mv >> 2);
295                     ai2_xfrac[1] = mv & 3;
296 
297                     mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
298                     ai2_yint[1] = pu_y + (mv >> 2);
299                     ai2_yfrac[1] = mv & 3;
300 
301                     ref_pic_l1 = ref_pic_luma_l1 + ai2_yint[1] * ref_strd
302                                     + ai2_xint[1];
303                     ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
304                     ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
305 
306                 }
307 
308                 pu1_dst = pu1_dst_luma + pu_y * ref_strd + pu_x;
309 
310                 ntaps = NTAPS_LUMA;
311                 coeff = gai1_luma_filter;
312             }
313 
314             else
315             {
316                 WORD32 mv;
317                 /* xint is upshifted by 1 because the chroma components are  */
318                 /* interleaved which is not the assumption made by standard  */
319                 if(ps_pu->b2_pred_mode != PRED_L1)
320                 {
321                     mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
322                     ai2_xint[0] = (pu_x / 2 + (mv >> 3)) << 1;
323                     ai2_xfrac[0] = mv & 7;
324 
325                     mv = CLIP3(ps_pu->mv.s_l0_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
326                     ai2_yint[0] = pu_y / 2 + (mv >> 3);
327                     ai2_yfrac[0] = mv & 7;
328 
329                     ref_pic_l0 = ref_pic_chroma_l0 + ai2_yint[0] * ref_strd
330                                     + ai2_xint[0];
331 
332                     ai2_xfrac[0] &= ps_codec->i4_mv_frac_mask;
333                     ai2_yfrac[0] &= ps_codec->i4_mv_frac_mask;
334 
335                 }
336 
337                 if(ps_pu->b2_pred_mode != PRED_L0)
338                 {
339                     mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvx, (-((MAX_CTB_SIZE + pu_x + 7) << 2)), ((ps_sps->i2_pic_width_in_luma_samples - pu_x + 7) << 2));
340                     ai2_xint[1] = (pu_x / 2 + (mv >> 3)) << 1;
341                     ai2_xfrac[1] = mv & 7;
342 
343                     mv = CLIP3(ps_pu->mv.s_l1_mv.i2_mvy, (-((MAX_CTB_SIZE + pu_y + 7) << 2)), ((ps_sps->i2_pic_height_in_luma_samples - pu_y + 7) << 2));
344                     ai2_yint[1] = pu_y / 2 + (mv >> 3);
345                     ai2_yfrac[1] = mv & 7;
346 
347                     ref_pic_l1 = ref_pic_chroma_l1 + ai2_yint[1] * ref_strd
348                                     + ai2_xint[1];
349                     ai2_xfrac[1] &= ps_codec->i4_mv_frac_mask;
350                     ai2_yfrac[1] &= ps_codec->i4_mv_frac_mask;
351 
352                 }
353 
354                 pu1_dst = pu1_dst_chroma + pu_y * ref_strd / 2 + pu_x;
355 
356                 ntaps = NTAPS_CHROMA;
357                 coeff = gai1_chroma_filter;
358             }
359 
360             if(ps_pu->b2_pred_mode != PRED_L1)
361             {
362                 func_indx1 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
363                 func_indx1 += ai2_xfrac[0] ? 2 : 0;
364                 func_indx1 += ai2_yfrac[0] ? 1 : 0;
365 
366                 func_indx2 = (ai2_xfrac[0] && ai2_yfrac[0])
367                                 * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
368 
369                 func_ptr1 = ps_codec->apf_inter_pred[func_indx1];
370                 func_ptr2 = ps_codec->apf_inter_pred[func_indx2];
371             }
372             else
373             {
374                 func_ptr1 = NULL;
375                 func_ptr2 = NULL;
376             }
377             if(ps_pu->b2_pred_mode != PRED_L0)
378             {
379                 func_indx3 = 4 * (weighted_pred || bi_pred) + 1 + 11 * clr_indx;
380                 func_indx3 += ai2_xfrac[1] ? 2 : 0;
381                 func_indx3 += ai2_yfrac[1] ? 1 : 0;
382 
383                 func_indx4 = (ai2_xfrac[1] && ai2_yfrac[1])
384                                 * (9 + (weighted_pred || bi_pred)) + 11 * clr_indx;
385 
386                 func_ptr3 = ps_codec->apf_inter_pred[func_indx3];
387                 func_ptr4 = ps_codec->apf_inter_pred[func_indx4];
388             }
389             else
390             {
391                 func_ptr3 = NULL;
392                 func_ptr4 = NULL;
393             }
394 
395             /*Function 1*/
396             if(func_ptr1 != NULL)
397             {
398                 func_src_strd = ref_strd;
399                 func_src = (ai2_xfrac[0] && ai2_yfrac[0]) ?
400                                 ref_pic_l0 - (ntaps / 2 - 1) * func_src_strd :
401                                 ref_pic_l0;
402                 func_dst = (weighted_pred || bi_pred) ?
403                                 (void *)pi2_tmp1 : (void *)pu1_dst;
404                 if(ai2_xfrac[0] && ai2_yfrac[0])
405                 {
406                     func_dst = pi2_tmp1;
407                 }
408 
409                 func_dst_strd = (weighted_pred || bi_pred
410                                 || (ai2_xfrac[0] && ai2_yfrac[0])) ?
411                                 pu_wd : ref_strd;
412                 func_coeff = ai2_xfrac[0] ?
413                                 coeff[ai2_xfrac[0]] : coeff[ai2_yfrac[0]];
414                 func_wd = pu_wd >> clr_indx;
415                 func_ht = pu_ht >> clr_indx;
416                 func_ht += (ai2_xfrac[0] && ai2_yfrac[0]) ? ntaps - 1 : 0;
417                 func_ptr1(func_src, func_dst, func_src_strd, func_dst_strd,
418                           func_coeff, func_ht, func_wd);
419             }
420 
421             /*Function 2*/
422             if(func_ptr2 != NULL)
423             {
424                 func_src_strd = pu_wd;
425                 func_src = pi2_tmp1 + (ntaps / 2 - 1) * func_src_strd;
426                 func_dst = (weighted_pred || bi_pred) ?
427                                 (void *)pi2_tmp1 : (void *)pu1_dst;
428 
429                 func_dst_strd = (weighted_pred || bi_pred) ?
430                                 pu_wd : ref_strd;
431                 func_coeff = coeff[ai2_yfrac[0]];
432                 func_wd = pu_wd >> clr_indx;
433                 func_ht = pu_ht >> clr_indx;
434                 func_ptr2(func_src, func_dst, func_src_strd, func_dst_strd,
435                           func_coeff, func_ht, func_wd);
436             }
437 
438             if(func_ptr3 != NULL)
439             {
440                 func_src_strd = ref_strd;
441                 func_src = (ai2_xfrac[1] && ai2_yfrac[1]) ?
442                                 ref_pic_l1 - (ntaps / 2 - 1) * func_src_strd :
443                                 ref_pic_l1;
444 
445                 func_dst = (weighted_pred || bi_pred) ?
446                                 (void *)pi2_tmp2 : (void *)pu1_dst;
447                 if(ai2_xfrac[1] && ai2_yfrac[1])
448                 {
449                     func_dst = pi2_tmp2;
450                 }
451                 func_dst_strd = (weighted_pred || bi_pred
452                                 || (ai2_xfrac[1] && ai2_yfrac[1])) ?
453                                 pu_wd : ref_strd;
454                 func_coeff = ai2_xfrac[1] ?
455                                 coeff[ai2_xfrac[1]] : coeff[ai2_yfrac[1]];
456                 func_wd = pu_wd >> clr_indx;
457                 func_ht = pu_ht >> clr_indx;
458                 func_ht += (ai2_xfrac[1] && ai2_yfrac[1]) ? ntaps - 1 : 0;
459                 func_ptr3(func_src, func_dst, func_src_strd, func_dst_strd,
460                           func_coeff, func_ht, func_wd);
461 
462             }
463 
464             if(func_ptr4 != NULL)
465             {
466                 func_src_strd = pu_wd;
467                 func_src = pi2_tmp2 + (ntaps / 2 - 1) * func_src_strd;
468 
469                 func_dst = (weighted_pred || bi_pred) ?
470                                 (void *)pi2_tmp2 : (void *)pu1_dst;
471                 func_dst_strd = (weighted_pred || bi_pred) ?
472                                 pu_wd : ref_strd;
473                 func_coeff = coeff[ai2_yfrac[1]];
474                 func_wd = pu_wd >> clr_indx;
475                 func_ht = pu_ht >> clr_indx;
476                 func_ptr4(func_src, func_dst, func_src_strd, func_dst_strd,
477                           func_coeff, func_ht, func_wd);
478 
479             }
480 
481             PROFILE_DISABLE_INTER_PRED_LUMA_AVERAGING(clr_indx);
482             PROFILE_DISABLE_INTER_PRED_CHROMA_AVERAGING(clr_indx);
483 
484 
485             if((weighted_pred != 0) && (bi_pred != 0))
486             {
487                 lvl_shift1 = 0;
488                 lvl_shift2 = 0;
489                 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
490                     lvl_shift1 = (1 << 13);
491 
492                 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
493                     lvl_shift2 = (1 << 13);
494 
495 
496                 if(0 == clr_indx)
497                 {
498                     shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
499                                     + SHIFT_14_MINUS_BIT_DEPTH + 1;
500 
501                     ps_codec->s_func_selector.ihevc_weighted_pred_bi_fptr(pi2_tmp1,
502                                                                           pi2_tmp2,
503                                                                           pu1_dst,
504                                                                           pu_wd,
505                                                                           pu_wd,
506                                                                           ref_strd,
507                                                                           luma_weight_l0,
508                                                                           luma_offset_l0,
509                                                                           luma_weight_l1,
510                                                                           luma_offset_l1,
511                                                                           shift,
512                                                                           lvl_shift1,
513                                                                           lvl_shift2,
514                                                                           pu_ht,
515                                                                           pu_wd);
516                 }
517                 else
518                 {
519                     shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
520                                     + SHIFT_14_MINUS_BIT_DEPTH + 1;
521 
522                     if(chroma_yuv420sp_vu)
523                     {
524                         ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
525                                                                                      pi2_tmp2,
526                                                                                      pu1_dst,
527                                                                                      pu_wd,
528                                                                                      pu_wd,
529                                                                                      ref_strd,
530                                                                                      chroma_weight_l0_cr,
531                                                                                      chroma_weight_l0_cb,
532                                                                                      chroma_offset_l0_cr,
533                                                                                      chroma_offset_l0_cb,
534                                                                                      chroma_weight_l1_cr,
535                                                                                      chroma_weight_l1_cb,
536                                                                                      chroma_offset_l1_cr,
537                                                                                      chroma_offset_l1_cb,
538                                                                                      shift,
539                                                                                      lvl_shift1,
540                                                                                      lvl_shift2,
541                                                                                      pu_ht >> 1,
542                                                                                      pu_wd >> 1);
543                     }
544                     else
545                     {
546                         ps_codec->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr(pi2_tmp1,
547                                                                                      pi2_tmp2,
548                                                                                      pu1_dst,
549                                                                                      pu_wd,
550                                                                                      pu_wd,
551                                                                                      ref_strd,
552                                                                                      chroma_weight_l0_cb,
553                                                                                      chroma_weight_l0_cr,
554                                                                                      chroma_offset_l0_cb,
555                                                                                      chroma_offset_l0_cr,
556                                                                                      chroma_weight_l1_cb,
557                                                                                      chroma_weight_l1_cr,
558                                                                                      chroma_offset_l1_cb,
559                                                                                      chroma_offset_l1_cr,
560                                                                                      shift,
561                                                                                      lvl_shift1,
562                                                                                      lvl_shift2,
563                                                                                      pu_ht >> 1,
564                                                                                      pu_wd >> 1);
565                     }
566                 }
567             }
568 
569             else if((weighted_pred != 0) && (bi_pred == 0))
570             {
571                 lvl_shift1 = 0;
572                 if(ps_pu->b2_pred_mode == PRED_L0)
573                 {
574                     if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
575                         lvl_shift1 = (1 << 13);
576                 }
577                 else
578                 {
579                     if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
580                         lvl_shift1 = (1 << 13);
581                 }
582 
583                 if(0 == clr_indx)
584                 {
585                     shift = ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom
586                                     + SHIFT_14_MINUS_BIT_DEPTH;
587 
588                     ps_codec->s_func_selector.ihevc_weighted_pred_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
589                                                                            pu1_dst,
590                                                                            pu_wd,
591                                                                            ref_strd,
592                                                                            ps_pu->b2_pred_mode == PRED_L0 ? luma_weight_l0 : luma_weight_l1,
593                                                                            ps_pu->b2_pred_mode == PRED_L0 ? luma_offset_l0 : luma_offset_l1,
594                                                                            shift,
595                                                                            lvl_shift1,
596                                                                            pu_ht,
597                                                                            pu_wd);
598                 }
599                 else
600                 {
601                     shift = ps_slice_hdr->s_wt_ofst.i1_chroma_log2_weight_denom
602                                     + SHIFT_14_MINUS_BIT_DEPTH;
603 
604                     if(chroma_yuv420sp_vu)
605                     {
606                         ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
607                                                                                       pu1_dst,
608                                                                                       pu_wd,
609                                                                                       ref_strd,
610                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
611                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
612                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
613                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
614                                                                                       shift,
615                                                                                       lvl_shift1,
616                                                                                       pu_ht >> 1,
617                                                                                       pu_wd >> 1);
618                     }
619                     else
620                     {
621                         ps_codec->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr(ps_pu->b2_pred_mode == PRED_L0 ? pi2_tmp1 : pi2_tmp2,
622                                                                                       pu1_dst,
623                                                                                       pu_wd,
624                                                                                       ref_strd,
625                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cb : chroma_weight_l1_cb,
626                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_weight_l0_cr : chroma_weight_l1_cr,
627                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cb : chroma_offset_l1_cb,
628                                                                                       ps_pu->b2_pred_mode == PRED_L0 ? chroma_offset_l0_cr : chroma_offset_l1_cr,
629                                                                                       shift,
630                                                                                       lvl_shift1,
631                                                                                       pu_ht >> 1,
632                                                                                       pu_wd >> 1);
633                     }
634                 }
635             }
636 
637             else if((weighted_pred == 0) && (bi_pred != 0))
638             {
639                 lvl_shift1 = 0;
640                 lvl_shift2 = 0;
641                 if((0 == clr_indx) && (ai2_xfrac[0] && ai2_yfrac[0]))
642                     lvl_shift1 = (1 << 13);
643 
644                 if((0 == clr_indx) && (ai2_xfrac[1] && ai2_yfrac[1]))
645                     lvl_shift2 = (1 << 13);
646 
647                 if(clr_indx != 0)
648                 {
649                     pu_ht = (pu_ht >> 1);
650                 }
651                 ps_codec->s_func_selector.ihevc_weighted_pred_bi_default_fptr(pi2_tmp1,
652                                                                               pi2_tmp2,
653                                                                               pu1_dst,
654                                                                               pu_wd,
655                                                                               pu_wd,
656                                                                               ref_strd,
657                                                                               lvl_shift1,
658                                                                               lvl_shift2,
659                                                                               pu_ht,
660                                                                               pu_wd);
661 
662             }
663         }
664     }
665 }
666