1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ihevce_inter_pred.c
25 *
26 * @brief
27 *  Contains funtions for giving out prediction samples for a given pu
28 *
29 * @author
30 *  Ittiam
31 *
32 * @par List of Functions:
33 *   - ihevc_inter_pred()
34 *
35 *
36 *******************************************************************************
37 */
38 /* System include files */
39 #include <stdio.h>
40 #include <string.h>
41 #include <stdlib.h>
42 #include <assert.h>
43 #include <stdarg.h>
44 #include <math.h>
45 
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50 
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54 
55 #include "ihevc_debug.h"
56 #include "ihevc_defs.h"
57 #include "ihevc_structs.h"
58 #include "ihevc_platform_macros.h"
59 #include "ihevc_deblk.h"
60 #include "ihevc_itrans_recon.h"
61 #include "ihevc_chroma_itrans_recon.h"
62 #include "ihevc_chroma_intra_pred.h"
63 #include "ihevc_intra_pred.h"
64 #include "ihevc_inter_pred.h"
65 #include "ihevc_mem_fns.h"
66 #include "ihevc_padding.h"
67 #include "ihevc_weighted_pred.h"
68 #include "ihevc_sao.h"
69 #include "ihevc_resi_trans.h"
70 #include "ihevc_quant_iquant_ssd.h"
71 #include "ihevc_cabac_tables.h"
72 
73 #include "ihevce_defs.h"
74 #include "ihevce_lap_enc_structs.h"
75 #include "ihevce_multi_thrd_structs.h"
76 #include "ihevce_me_common_defs.h"
77 #include "ihevce_had_satd.h"
78 #include "ihevce_error_codes.h"
79 #include "ihevce_bitstream.h"
80 #include "ihevce_cabac.h"
81 #include "ihevce_rdoq_macros.h"
82 #include "ihevce_function_selector.h"
83 #include "ihevce_enc_structs.h"
84 #include "ihevce_entropy_structs.h"
85 #include "ihevce_cmn_utils_instr_set_router.h"
86 #include "ihevce_enc_loop_structs.h"
87 #include "ihevce_inter_pred.h"
88 #include "ihevc_weighted_pred.h"
89 
90 /*****************************************************************************/
91 /* Global tables                                                             */
92 /*****************************************************************************/
93 
94 /**
95 ******************************************************************************
96 * @brief  Table of filter tap coefficients for HEVC luma inter prediction
97 * input   : sub pel mv position (dx/dy = 0:3)
98 * output  : filter coeffs to be used for that position
99 *
100 * @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC
101 ******************************************************************************
102 */
103 WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 },
104                                                     { -1, 4, -10, 58, 17, -5, 1, 0 },
105                                                     { -1, 4, -11, 40, 40, -11, 4, -1 },
106                                                     { 0, 1, -5, 17, 58, -10, 4, -1 } };
107 
108 /**
109 ******************************************************************************
110 * @brief  Table of filter tap coefficients for HEVC chroma inter prediction
111 * input   : chroma sub pel mv position (dx/dy = 0:7)
112 * output  : filter coeffs to be used for that position
113 *
114 * @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC
115 The filter uses only the first four elements in each array
116 ******************************************************************************
117 */
118 WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 },    { -2, 58, 10, -2 },
119                                                         { -4, 54, 16, -2 }, { -6, 46, 28, -4 },
120                                                         { -4, 36, 36, -4 }, { -4, 28, 46, -6 },
121                                                         { -2, 16, 54, -4 }, { -2, 10, 58, -2 } };
122 
123 /*****************************************************************************/
124 /* Function Definitions                                                      */
125 /*****************************************************************************/
126 
127 /**
128 *******************************************************************************
129 *
130 * @brief
131 *  Performs Luma inter pred based on sub pel position dxdy and store the result
132 *  in a 16 bit destination buffer
133 *
134 * @param[in] pu1_src
135 *  pointer to the source correspoding to integer pel position of a mv (left and
136 *  top justified integer position)
137 *
138 * @param[out] pi2_dst
139 *  WORD16 pointer to the destination
140 *
141 * @param[in] src_strd
142 *  source buffer stride
143 *
144 * @param[in] dst_strd
145 *  destination buffer stride
146 *
147 * @param[in] pi2_hdst_scratch
148 *  scratch buffer for intermediate storage of horizontal filter output; used as
149 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
150 *
151 *  Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
152 *
153 * @param[in] ht
154 *  width of the prediction unit
155 *
156 * @param[in] wd
157 *  width of the prediction unit
158 *
159 * @param[in] dx
160 *  qpel position[0:3] of mv in x direction
161 *
162 * @param[in] dy
163 *  qpel position[0:3] of mv in y direction
164 *
165 * @returns
166 *   none
167 *
168 * @remarks
169 *
170 *******************************************************************************
171 */
ihevce_luma_interpolate_16bit_dxdy(UWORD8 * pu1_src,WORD16 * pi2_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)172 void ihevce_luma_interpolate_16bit_dxdy(
173     UWORD8 *pu1_src,
174     WORD16 *pi2_dst,
175     WORD32 src_strd,
176     WORD32 dst_strd,
177     WORD16 *pi2_hdst_scratch,
178     WORD32 ht,
179     WORD32 wd,
180     WORD32 dy,
181     WORD32 dx,
182     func_selector_t *ps_func_selector)
183 {
184     if((0 == dx) && (0 == dy))
185     {
186         /*--------- full pel position : copy input by upscaling-------*/
187 
188         ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr(
189             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
190     }
191     else if((0 != dx) && (0 != dy))
192     {
193         /*----------sub pel in both x and y direction---------*/
194 
195         UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
196         WORD32 hdst_buf_stride = wd;
197         WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
198 
199         /* horizontal filtering of source done in a scratch buffer first  */
200         ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
201             pu1_horz_src,
202             pi2_hdst_scratch,
203             src_strd,
204             hdst_buf_stride,
205             &gai1_hevc_luma_filter_taps[dx][0],
206             (ht + NTAPS_LUMA - 1),
207             wd);
208 
209         /* vertical filtering on scratch buffer and stored in desitnation  */
210         ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr(
211             pi2_vert_src,
212             pi2_dst,
213             hdst_buf_stride,
214             dst_strd,
215             &gai1_hevc_luma_filter_taps[dy][0],
216             ht,
217             wd);
218     }
219     else if(0 == dy)
220     {
221         /*----------sub pel in x direction only ---------*/
222 
223         ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
224             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
225     }
226     else /* if (0 == dx) */
227     {
228         /*----------sub pel in y direction only ---------*/
229 
230         ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr(
231             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
232     }
233 }
234 
235 /**
236 *******************************************************************************
237 *
238 * @brief
239 *  Performs Luma inter pred based on sub pel position dxdy and store the result
240 *  in a 8 bit destination buffer
241 *
242 * @param[in] pu1_src
243 *  pointer to the source correspoding to integer pel position of a mv (left and
244 *  top justified integer position)
245 *
246 * @param[out] pu1_dst
247 *  UWORD8 pointer to the destination
248 *
249 * @param[in] src_strd
250 *  source buffer stride
251 *
252 * @param[in] dst_strd
253 *  destination buffer stride
254 *
255 * @param[in] pi2_hdst_scratch
256 *  scratch buffer for intermediate storage of horizontal filter output; used as
257 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
258 *
259 *  Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
260 *
261 * @param[in] ht
262 *  width of the prediction unit
263 *
264 * @param[in] wd
265 *  width of the prediction unit
266 *
267 * @param[in] dx
268 *  qpel position[0:3] of mv in x direction
269 *
270 * @param[in] dy
271 *  qpel position[0:3] of mv in y direction
272 *
273 * @returns
274 *   none
275 *
276 * @remarks
277 *
278 *******************************************************************************
279 */
ihevce_luma_interpolate_8bit_dxdy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)280 void ihevce_luma_interpolate_8bit_dxdy(
281     UWORD8 *pu1_src,
282     UWORD8 *pu1_dst,
283     WORD32 src_strd,
284     WORD32 dst_strd,
285     WORD16 *pi2_hdst_scratch,
286     WORD32 ht,
287     WORD32 wd,
288     WORD32 dy,
289     WORD32 dx,
290     func_selector_t *ps_func_selector)
291 {
292     if((0 == dx) && (0 == dy))
293     {
294         /*--------- full pel position : copy input as is -------*/
295 
296         ps_func_selector->ihevc_inter_pred_luma_copy_fptr(
297             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
298     }
299     else if((0 != dx) && (0 != dy))
300     {
301         /*----------sub pel in both x and y direction---------*/
302 
303         UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
304         WORD32 hdst_buf_stride = wd;
305         WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
306 
307         /* horizontal filtering of source done in a scratch buffer first  */
308         ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
309             pu1_horz_src,
310             pi2_hdst_scratch,
311             src_strd,
312             hdst_buf_stride,
313             &gai1_hevc_luma_filter_taps[dx][0],
314             (ht + NTAPS_LUMA - 1),
315             wd);
316 
317         /* vertical filtering on scratch buffer and stored in desitnation  */
318         ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr(
319             pi2_vert_src,
320             pu1_dst,
321             hdst_buf_stride,
322             dst_strd,
323             &gai1_hevc_luma_filter_taps[dy][0],
324             ht,
325             wd);
326     }
327     else if(0 == dy)
328     {
329         /*----------sub pel in x direction only ---------*/
330 
331         ps_func_selector->ihevc_inter_pred_luma_horz_fptr(
332             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
333     }
334     else /* if (0 == dx) */
335     {
336         /*----------sub pel in y direction only ---------*/
337 
338         ps_func_selector->ihevc_inter_pred_luma_vert_fptr(
339             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
340     }
341 }
342 
343 /**
344 *******************************************************************************
345 *
346 * @brief
347 *  Performs Luma prediction for a inter prediction unit(PU)
348 *
349 * @par Description:
350 *  For a given PU, Inter prediction followed by weighted prediction (if
351 *  required)
352 *
353 * @param[in] ps_inter_pred_ctxt
354 *  context for inter prediction; contains ref list, weight offsets, ctb offsets
355 *
356 * @param[in] ps_pu
357 *  pointer to PU structure whose inter prediction needs to be done
358 *
359 * @param[in] pu1_dst_buf
360 *  pointer to destination buffer where the inter prediction is done
361 *
362 * @param[in] dst_stride
363 *  pitch of the destination buffer
364 *
365 * @returns
366 *   IV_FAIL for mvs going outside ref frame padded limits
367 *   IV_SUCCESS after completing mc for given inter pu
368 *
369 * @remarks
370 *
371 *******************************************************************************
372 */
ihevce_luma_inter_pred_pu(void * pv_inter_pred_ctxt,pu_t * ps_pu,void * pv_dst_buf,WORD32 dst_stride,WORD32 i4_flag_inter_pred_source)373 IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
374     void *pv_inter_pred_ctxt,
375     pu_t *ps_pu,
376     void *pv_dst_buf,
377     WORD32 dst_stride,
378     WORD32 i4_flag_inter_pred_source)
379 {
380     inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
381     func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
382 
383     WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
384     UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf;
385     WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
386     WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
387 
388     WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
389                      ps_inter_pred_ctxt->i1_weighted_bipred_flag;
390 
391     /* 16bit dest required for interpolate if weighted pred is on or bipred */
392     WORD32 store_16bit_output;
393 
394     recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
395     UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
396     WORD32 ref_pic_stride;
397 
398     /* offset of reference block in integer pel units */
399     WORD32 frm_x_ofst, frm_y_ofst;
400     WORD32 frm_x_pu, frm_y_pu;
401 
402     /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
403     WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
404     WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
405 
406     /* scratch buffer for horizontal interpolation destination */
407     WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
408 
409     WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1;
410 
411     /* get PU's frm x and frm y offset */
412     frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
413     frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2);
414 
415     /* sanity checks */
416     ASSERT((wp_flag == 0) || (wp_flag == 1));
417     ASSERT(dst_stride >= pu_wd);
418     ASSERT(ps_pu->b1_intra_flag == 0);
419 
420     lvl_shift0 = 0;
421     lvl_shift1 = 0;
422 
423     if(wp_flag)
424     {
425         UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
426 
427         if(inter_pred_idc != PRED_L1)
428         {
429             ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
430             u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag;
431         }
432         if(inter_pred_idc != PRED_L0)
433         {
434             ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
435             u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag;
436         }
437         if(inter_pred_idc == PRED_BI)
438         {
439             wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
440         }
441         else if(inter_pred_idc == PRED_L0)
442         {
443             wp_flag = u1_is_wgt_pred_L0;
444         }
445         else if(inter_pred_idc == PRED_L1)
446         {
447             wp_flag = u1_is_wgt_pred_L1;
448         }
449         else
450         {
451             /*other values are not allowed*/
452             assert(0);
453         }
454     }
455     store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
456 
457     if(inter_pred_idc != PRED_L1)
458     {
459         /*****************************************************/
460         /*              L0 inter prediction                  */
461         /*****************************************************/
462 
463         /* motion vecs in qpel precision                    */
464         WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
465         WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
466 
467         /* sub pel offsets in x and y direction w.r.t integer pel   */
468         WORD32 dx = mv_x & 0x3;
469         WORD32 dy = mv_y & 0x3;
470 
471         /* ref idx is currently stored in the lower 4bits           */
472         WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
473 
474         /*  x and y integer offsets w.r.t frame start               */
475         frm_x_ofst = (frm_x_pu + (mv_x >> 2));
476         frm_y_ofst = (frm_y_pu + (mv_y >> 2));
477 
478         ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
479 
480         /* picture buffer start and stride */
481         if(i4_flag_inter_pred_source == 1)
482         {
483             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf;
484         }
485         else
486         {
487             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf;
488         }
489         ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd;
490 
491         /* Error check for mvs going out of ref frame padded limits */
492         {
493             WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd;
494             WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht;
495 
496             min_x =
497                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
498                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
499                       : (PAD_HORZ - 4));
500 
501             max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
502                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
503                          : (PAD_HORZ - 4);
504 
505             min_y =
506                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
507                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
508                       : (PAD_VERT - 4));
509 
510             max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
511                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
512                          : (PAD_VERT - 4);
513 
514             if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
515                 //ASSERT(0);
516                 return (IV_FAIL);
517 
518             if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
519                 //ASSERT(0);
520                 return (IV_FAIL);
521         }
522 
523         /* point to reference start location in ref frame           */
524         /* Assuming clipping of mv is not required here as ME would */
525         /* take care of mv access not going beyond padded data      */
526         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
527 
528         /* level shifted for subpel with both x and y componenet being non 0 */
529         /* this is because the interpolate function subtract this to contain */
530         /* the resulting data in 16 bits                                     */
531         lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
532 
533         if(store_16bit_output)
534         {
535             /* do interpolation in 16bit L0 scratch buffer */
536             ihevce_luma_interpolate_16bit_dxdy(
537                 pu1_ref_int_pel,
538                 pi2_scr_buf_l0,
539                 ref_pic_stride,
540                 pu_wd,
541                 pi2_horz_scratch,
542                 pu_ht,
543                 pu_wd,
544                 dy,
545                 dx,
546                 ps_func_selector);
547         }
548         else
549         {
550             /* do interpolation in 8bit destination buffer and return */
551             ihevce_luma_interpolate_8bit_dxdy(
552                 pu1_ref_int_pel,
553                 pu1_dst_buf,
554                 ref_pic_stride,
555                 dst_stride,
556                 pi2_horz_scratch,
557                 pu_ht,
558                 pu_wd,
559                 dy,
560                 dx,
561                 ps_func_selector);
562 
563             return (IV_SUCCESS);
564         }
565     }
566 
567     if(inter_pred_idc != PRED_L0)
568     {
569         /*****************************************************/
570         /*      L1 inter prediction                          */
571         /*****************************************************/
572 
573         /* motion vecs in qpel precision                            */
574         WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
575         WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
576 
577         /* sub pel offsets in x and y direction w.r.t integer pel   */
578         WORD32 dx = mv_x & 0x3;
579         WORD32 dy = mv_y & 0x3;
580 
581         /* ref idx is currently stored in the lower 4bits           */
582         WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
583 
584         /*  x and y integer offsets w.r.t frame start               */
585         frm_x_ofst = (frm_x_pu + (mv_x >> 2));
586         frm_y_ofst = (frm_y_pu + (mv_y >> 2));
587 
588         ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
589 
590         /* picture buffer start and stride */
591 
592         if(i4_flag_inter_pred_source == 1)
593         {
594             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf;
595         }
596         else
597         {
598             pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf;
599         }
600         ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd;
601 
602         /* Error check for mvs going out of ref frame padded limits */
603         {
604             WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd;
605             WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht;
606 
607             min_x =
608                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
609                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
610                       : (PAD_HORZ - 4));
611 
612             max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
613                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
614                          : (PAD_HORZ - 4);
615 
616             min_y =
617                 -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
618                       ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
619                       : (PAD_VERT - 4));
620 
621             max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
622                          ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
623                          : (PAD_VERT - 4);
624 
625             if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
626                 //ASSERT(0);
627                 return (IV_FAIL);
628 
629             if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
630                 //ASSERT(0);
631                 return (IV_FAIL);
632         }
633 
634         /* point to reference start location in ref frame           */
635         /* Assuming clipping of mv is not required here as ME would */
636         /* take care of mv access not going beyond padded data      */
637         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
638 
639         /* level shifted for subpel with both x and y componenet being non 0 */
640         /* this is because the interpolate function subtract this to contain */
641         /* the resulting data in 16 bits                                     */
642         lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
643 
644         if(store_16bit_output)
645         {
646             /* do interpolation in 16bit L1 scratch buffer */
647             ihevce_luma_interpolate_16bit_dxdy(
648                 pu1_ref_int_pel,
649                 pi2_scr_buf_l1,
650                 ref_pic_stride,
651                 pu_wd,
652                 pi2_horz_scratch,
653                 pu_ht,
654                 pu_wd,
655                 dy,
656                 dx,
657                 ps_func_selector);
658         }
659         else
660         {
661             /* do interpolation in 8bit destination buffer and return */
662             ihevce_luma_interpolate_8bit_dxdy(
663                 pu1_ref_int_pel,
664                 pu1_dst_buf,
665                 ref_pic_stride,
666                 dst_stride,
667                 pi2_horz_scratch,
668                 pu_ht,
669                 pu_wd,
670                 dy,
671                 dx,
672                 ps_func_selector);
673 
674             return (IV_SUCCESS);
675         }
676     }
677 
678     if((inter_pred_idc != PRED_BI) && wp_flag)
679     {
680         /*****************************************************/
681         /*      unidirection weighted prediction             */
682         /*****************************************************/
683         ihevce_wght_offst_t *ps_weight_offset;
684         WORD16 *pi2_src;
685         WORD32 lvl_shift;
686 
687         /* intialize the weight, offsets and ref based on l0/l1 mode */
688         if(inter_pred_idc == PRED_L0)
689         {
690             pi2_src = pi2_scr_buf_l0;
691             ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
692             lvl_shift = lvl_shift0;
693         }
694         else
695         {
696             pi2_src = pi2_scr_buf_l1;
697             ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
698             lvl_shift = lvl_shift1;
699         }
700 
701         wgt0 = ps_weight_offset->i2_luma_weight;
702         off0 = ps_weight_offset->i2_luma_offset;
703         shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
704 
705         /* do the uni directional weighted prediction */
706         ps_func_selector->ihevc_weighted_pred_uni_fptr(
707             pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd);
708     }
709     else
710     {
711         /*****************************************************/
712         /*              Bipred  prediction                   */
713         /*****************************************************/
714 
715         if(wp_flag)
716         {
717             /*****************************************************/
718             /*      Bi pred  weighted prediction                 */
719             /*****************************************************/
720             wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight;
721             off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset;
722 
723             wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight;
724             off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset;
725 
726             shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
727 
728             ps_func_selector->ihevc_weighted_pred_bi_fptr(
729                 pi2_scr_buf_l0,
730                 pi2_scr_buf_l1,
731                 pu1_dst_buf,
732                 pu_wd,
733                 pu_wd,
734                 dst_stride,
735                 wgt0,
736                 off0,
737                 wgt1,
738                 off1,
739                 shift,
740                 lvl_shift0,
741                 lvl_shift1,
742                 pu_ht,
743                 pu_wd);
744         }
745         else
746         {
747             /*****************************************************/
748             /*          Default Bi pred  prediction              */
749             /*****************************************************/
750             ps_func_selector->ihevc_weighted_pred_bi_default_fptr(
751                 pi2_scr_buf_l0,
752                 pi2_scr_buf_l1,
753                 pu1_dst_buf,
754                 pu_wd,
755                 pu_wd,
756                 dst_stride,
757                 lvl_shift0,
758                 lvl_shift1,
759                 pu_ht,
760                 pu_wd);
761         }
762     }
763 
764     return (IV_SUCCESS);
765 }
766 
767 /**
768 *******************************************************************************
769 *
770 * @brief
771 *  Performs Chroma inter pred based on sub pel position dxdy and store the
772 *  result in a 16 bit destination buffer
773 *
774 * @param[in] pu1_src
775 *  pointer to the source correspoding to integer pel position of a mv (left and
776 *  top justified integer position)
777 *
778 * @param[out] pi2_dst
779 *  WORD16 pointer to the destination
780 *
781 * @param[in] src_strd
782 *  source buffer stride
783 *
784 * @param[in] dst_strd
785 *  destination buffer stride
786 *
787 * @param[in] pi2_hdst_scratch
788 *  scratch buffer for intermediate storage of horizontal filter output; used as
789 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
790 *
791 *  Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
792 *
793 * @param[in] ht
794 *  width of the prediction unit
795 *
796 * @param[in] wd
797 *  width of the prediction unit
798 *
799 * @param[in] dx
800 *  1/8th pel position[0:7] of mv in x direction
801 *
802 * @param[in] dy
803 *  1/8th pel position[0:7] of mv in y direction
804 *
805 * @returns
806 *   none
807 *
808 * @remarks
809 *
810 *******************************************************************************
811 */
ihevce_chroma_interpolate_16bit_dxdy(UWORD8 * pu1_src,WORD16 * pi2_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)812 void ihevce_chroma_interpolate_16bit_dxdy(
813     UWORD8 *pu1_src,
814     WORD16 *pi2_dst,
815     WORD32 src_strd,
816     WORD32 dst_strd,
817     WORD16 *pi2_hdst_scratch,
818     WORD32 ht,
819     WORD32 wd,
820     WORD32 dy,
821     WORD32 dx,
822     func_selector_t *ps_func_selector)
823 {
824     if((0 == dx) && (0 == dy))
825     {
826         /*--------- full pel position : copy input by upscaling-------*/
827 
828         ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr(
829             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
830     }
831     else if((0 != dx) && (0 != dy))
832     {
833         /*----------sub pel in both x and y direction---------*/
834 
835         UWORD8 *pu1_horz_src = pu1_src - src_strd;
836         WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
837         WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
838 
839         /* horizontal filtering of source done in a scratch buffer first  */
840         ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
841             pu1_horz_src,
842             pi2_hdst_scratch,
843             src_strd,
844             hdst_buf_stride,
845             &gai1_hevc_chroma_filter_taps[dx][0],
846             (ht + NTAPS_CHROMA - 1),
847             wd);
848 
849         /* vertical filtering on scratch buffer and stored in desitnation  */
850         ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr(
851             pi2_vert_src,
852             pi2_dst,
853             hdst_buf_stride,
854             dst_strd,
855             &gai1_hevc_chroma_filter_taps[dy][0],
856             ht,
857             wd);
858     }
859     else if(0 == dy)
860     {
861         /*----------sub pel in x direction only ---------*/
862 
863         ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
864             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
865     }
866     else /* if (0 == dx) */
867     {
868         /*----------sub pel in y direction only ---------*/
869 
870         ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr(
871             pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
872     }
873 }
874 
875 /**
876 *******************************************************************************
877 *
878 * @brief
879 *  Performs Chroma inter pred based on sub pel position dxdy and store the
880 *  result in a 8 bit destination buffer
881 *
882 * @param[in] pu1_src
883 *  pointer to the source correspoding to integer pel position of a mv (left and
884 *  top justified integer position)
885 *
886 * @param[out] pu1_dst
887 *  UWORD8 pointer to the destination
888 *
889 * @param[in] src_strd
890 *  source buffer stride
891 *
892 * @param[in] dst_strd
893 *  destination buffer stride
894 *
895 * @param[in] pi2_hdst_scratch
896 *  scratch buffer for intermediate storage of horizontal filter output; used as
897 *  input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
898 *
899 *  Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
900 *
901 * @param[in] ht
902 *  width of the prediction unit
903 *
904 * @param[in] wd
905 *  width of the prediction unit
906 *
907 * @param[in] dx
908 *  1/8th pel position[0:7] of mv in x direction
909 *
910 * @param[in] dy
911 *  1/8th pel position[0:7] of mv in y direction
912 *
913 * @returns
914 *   none
915 *
916 * @remarks
917 *
918 *******************************************************************************
919 */
ihevce_chroma_interpolate_8bit_dxdy(UWORD8 * pu1_src,UWORD8 * pu1_dst,WORD32 src_strd,WORD32 dst_strd,WORD16 * pi2_hdst_scratch,WORD32 ht,WORD32 wd,WORD32 dy,WORD32 dx,func_selector_t * ps_func_selector)920 void ihevce_chroma_interpolate_8bit_dxdy(
921     UWORD8 *pu1_src,
922     UWORD8 *pu1_dst,
923     WORD32 src_strd,
924     WORD32 dst_strd,
925     WORD16 *pi2_hdst_scratch,
926     WORD32 ht,
927     WORD32 wd,
928     WORD32 dy,
929     WORD32 dx,
930     func_selector_t *ps_func_selector)
931 {
932     if((0 == dx) && (0 == dy))
933     {
934         /*--------- full pel position : copy input as is -------*/
935         ps_func_selector->ihevc_inter_pred_chroma_copy_fptr(
936             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
937     }
938     else if((0 != dx) && (0 != dy))
939     {
940         /*----------sub pel in both x and y direction---------*/
941         UWORD8 *pu1_horz_src = pu1_src - src_strd;
942         WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
943         WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
944 
945         /* horizontal filtering of source done in a scratch buffer first  */
946         ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
947             pu1_horz_src,
948             pi2_hdst_scratch,
949             src_strd,
950             hdst_buf_stride,
951             &gai1_hevc_chroma_filter_taps[dx][0],
952             (ht + NTAPS_CHROMA - 1),
953             wd);
954 
955         /* vertical filtering on scratch buffer and stored in desitnation  */
956         ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr(
957             pi2_vert_src,
958             pu1_dst,
959             hdst_buf_stride,
960             dst_strd,
961             &gai1_hevc_chroma_filter_taps[dy][0],
962             ht,
963             wd);
964     }
965     else if(0 == dy)
966     {
967         /*----------sub pel in x direction only ---------*/
968         ps_func_selector->ihevc_inter_pred_chroma_horz_fptr(
969             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
970     }
971     else /* if (0 == dx) */
972     {
973         /*----------sub pel in y direction only ---------*/
974         ps_func_selector->ihevc_inter_pred_chroma_vert_fptr(
975             pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
976     }
977 }
978 
979 /**
980 *******************************************************************************
981 *
982 * @brief
983 *  Performs Chroma prediction for a inter prediction unit(PU)
984 *
985 * @par Description:
986 *  For a given PU, Inter prediction followed by weighted prediction (if
987 *  required). The reference and destination buffers are uv interleaved
988 *
989 * @param[in] ps_inter_pred_ctxt
990 *  context for inter prediction; contains ref list, weight offsets, ctb offsets
991 *
992 * @param[in] ps_pu
993 *  pointer to PU structure whose inter prediction needs to be done
994 *
995 * @param[in] pu1_dst_buf
996 *  pointer to destination buffer where the inter prediction is done
997 *
998 * @param[in] dst_stride
999 *  pitch of the destination buffer
1000 *
1001 * @returns
1002 *   none
1003 *
1004 * @remarks
1005 *
1006 *******************************************************************************
1007 */
ihevce_chroma_inter_pred_pu(void * pv_inter_pred_ctxt,pu_t * ps_pu,UWORD8 * pu1_dst_buf,WORD32 dst_stride)1008 void ihevce_chroma_inter_pred_pu(
1009     void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride)
1010 {
1011     inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
1012     func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
1013 
1014     WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
1015     UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2);
1016     /* chroma width and height are half of luma width and height */
1017     WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1;
1018     WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1);
1019 
1020     WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
1021                      ps_inter_pred_ctxt->i1_weighted_bipred_flag;
1022 
1023     /* 16bit dest required for interpolate if weighted pred is on or bipred */
1024     WORD32 store_16bit_output;
1025 
1026     recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
1027     UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
1028     WORD32 ref_pic_stride;
1029 
1030     /* offset of reference block in integer pel units */
1031     WORD32 frm_x_ofst, frm_y_ofst;
1032     WORD32 frm_x_pu, frm_y_pu;
1033 
1034     /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
1035     WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
1036     WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
1037 
1038     /* scratch buffer for horizontal interpolation destination */
1039     WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
1040 
1041     /* get PU's frm x and frm y offset : Note uv is interleaved */
1042     frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
1043     frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) +
1044                (ps_pu->b4_pos_y << (u1_is_422 + 1));
1045 
1046     /* sanity checks */
1047     ASSERT((wp_flag == 0) || (wp_flag == 1));
1048     ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */
1049     ASSERT(ps_pu->b1_intra_flag == 0);
1050 
1051     if(wp_flag)
1052     {
1053         UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
1054 
1055         if(inter_pred_idc != PRED_L1)
1056         {
1057             ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
1058             u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag;
1059         }
1060         if(inter_pred_idc != PRED_L0)
1061         {
1062             ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
1063             u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag;
1064         }
1065         if(inter_pred_idc == PRED_BI)
1066         {
1067             wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
1068         }
1069         else if(inter_pred_idc == PRED_L0)
1070         {
1071             wp_flag = u1_is_wgt_pred_L0;
1072         }
1073         else if(inter_pred_idc == PRED_L1)
1074         {
1075             wp_flag = u1_is_wgt_pred_L1;
1076         }
1077         else
1078         {
1079             /*other values are not allowed*/
1080             assert(0);
1081         }
1082     }
1083     store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
1084 
1085     if(inter_pred_idc != PRED_L1)
1086     {
1087         /*****************************************************/
1088         /*              L0 inter prediction(Chroma )         */
1089         /*****************************************************/
1090 
1091         /* motion vecs in qpel precision                    */
1092         WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
1093         WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
1094 
1095         /* sub pel offsets in x and y direction w.r.t integer pel   */
1096         WORD32 dx = mv_x & 0x7;
1097         WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1098 
1099         /* ref idx is currently stored in the lower 4bits           */
1100         WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
1101 
1102         /*  x and y integer offsets w.r.t frame start               */
1103 
1104         frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1105         frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1106 
1107         ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
1108 
1109         /* picture buffer start and stride */
1110         pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf;
1111         ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd;
1112 
1113         /* point to reference start location in ref frame           */
1114         /* Assuming clipping of mv is not required here as ME would */
1115         /* take care of mv access not going beyond padded data      */
1116         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1117 
1118         if(store_16bit_output)
1119         {
1120             /* do interpolation in 16bit L0 scratch buffer */
1121             ihevce_chroma_interpolate_16bit_dxdy(
1122                 pu1_ref_int_pel,
1123                 pi2_scr_buf_l0,
1124                 ref_pic_stride,
1125                 (pu_wd_chroma << 1),
1126                 pi2_horz_scratch,
1127                 pu_ht_chroma,
1128                 pu_wd_chroma,
1129                 dy,
1130                 dx,
1131                 ps_func_selector);
1132         }
1133         else
1134         {
1135             /* do interpolation in 8bit destination buffer and return */
1136             ihevce_chroma_interpolate_8bit_dxdy(
1137                 pu1_ref_int_pel,
1138                 pu1_dst_buf,
1139                 ref_pic_stride,
1140                 dst_stride,
1141                 pi2_horz_scratch,
1142                 pu_ht_chroma,
1143                 pu_wd_chroma,
1144                 dy,
1145                 dx,
1146                 ps_func_selector);
1147 
1148             return;
1149         }
1150     }
1151 
1152     if(inter_pred_idc != PRED_L0)
1153     {
1154         /*****************************************************/
1155         /*      L1 inter prediction(Chroma)                  */
1156         /*****************************************************/
1157 
1158         /* motion vecs in qpel precision                            */
1159         WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
1160         WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
1161 
1162         /* sub pel offsets in x and y direction w.r.t integer pel   */
1163         WORD32 dx = mv_x & 0x7;
1164         WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
1165 
1166         /* ref idx is currently stored in the lower 4bits           */
1167         WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
1168 
1169         /*  x and y integer offsets w.r.t frame start               */
1170         frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
1171         frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
1172 
1173         ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
1174 
1175         /* picture buffer start and stride */
1176         pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf;
1177         ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd;
1178 
1179         /* point to reference start location in ref frame           */
1180         /* Assuming clipping of mv is not required here as ME would */
1181         /* take care of mv access not going beyond padded data      */
1182         pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
1183 
1184         if(store_16bit_output)
1185         {
1186             /* do interpolation in 16bit L1 scratch buffer */
1187             ihevce_chroma_interpolate_16bit_dxdy(
1188                 pu1_ref_int_pel,
1189                 pi2_scr_buf_l1,
1190                 ref_pic_stride,
1191                 (pu_wd_chroma << 1),
1192                 pi2_horz_scratch,
1193                 pu_ht_chroma,
1194                 pu_wd_chroma,
1195                 dy,
1196                 dx,
1197                 ps_func_selector);
1198         }
1199         else
1200         {
1201             /* do interpolation in 8bit destination buffer and return */
1202             ihevce_chroma_interpolate_8bit_dxdy(
1203                 pu1_ref_int_pel,
1204                 pu1_dst_buf,
1205                 ref_pic_stride,
1206                 dst_stride,
1207                 pi2_horz_scratch,
1208                 pu_ht_chroma,
1209                 pu_wd_chroma,
1210                 dy,
1211                 dx,
1212                 ps_func_selector);
1213 
1214             return;
1215         }
1216     }
1217 
1218     if((inter_pred_idc != PRED_BI) && wp_flag)
1219     {
1220         /*****************************************************/
1221         /*      unidirection weighted prediction(Chroma)     */
1222         /*****************************************************/
1223         ihevce_wght_offst_t *ps_weight_offset;
1224         WORD16 *pi2_src;
1225         WORD32 lvl_shift = 0;
1226         WORD32 wgt_cb, wgt_cr, off_cb, off_cr;
1227         WORD32 shift;
1228 
1229         /* intialize the weight, offsets and ref based on l0/l1 mode */
1230         if(inter_pred_idc == PRED_L0)
1231         {
1232             pi2_src = pi2_scr_buf_l0;
1233             ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
1234         }
1235         else
1236         {
1237             pi2_src = pi2_scr_buf_l1;
1238             ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
1239         }
1240 
1241         wgt_cb = ps_weight_offset->i2_cb_weight;
1242         off_cb = ps_weight_offset->i2_cb_offset;
1243         wgt_cr = ps_weight_offset->i2_cr_weight;
1244         off_cr = ps_weight_offset->i2_cr_offset;
1245 
1246         shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
1247 
1248         /* do the uni directional weighted prediction */
1249         ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr(
1250             pi2_src,
1251             pu1_dst_buf,
1252             (pu_wd_chroma << 1),
1253             dst_stride,
1254             wgt_cb,
1255             wgt_cr,
1256             off_cb,
1257             off_cr,
1258             shift,
1259             lvl_shift,
1260             pu_ht_chroma,
1261             pu_wd_chroma);
1262     }
1263     else
1264     {
1265         /*****************************************************/
1266         /*              Bipred  prediction(Chroma)           */
1267         /*****************************************************/
1268         if(wp_flag)
1269         {
1270             WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr;
1271             WORD32 off0_cb, off1_cb, off0_cr, off1_cr;
1272             WORD32 shift;
1273 
1274             /*****************************************************/
1275             /*      Bi pred  weighted prediction (Chroma)        */
1276             /*****************************************************/
1277             wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight;
1278             off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset;
1279 
1280             wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight;
1281             off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset;
1282 
1283             wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight;
1284             off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset;
1285 
1286             wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight;
1287             off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset;
1288 
1289             shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
1290 
1291             ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr(
1292                 pi2_scr_buf_l0,
1293                 pi2_scr_buf_l1,
1294                 pu1_dst_buf,
1295                 (pu_wd_chroma << 1),
1296                 (pu_wd_chroma << 1),
1297                 dst_stride,
1298                 wgt0_cb,
1299                 wgt0_cr,
1300                 off0_cb,
1301                 off0_cr,
1302                 wgt1_cb,
1303                 wgt1_cr,
1304                 off1_cb,
1305                 off1_cr,
1306                 shift,
1307                 0,
1308                 0,
1309                 pu_ht_chroma,
1310                 pu_wd_chroma);
1311         }
1312         else
1313         {
1314             /*****************************************************/
1315             /*          Default Bi pred  prediction (Chroma)     */
1316             /*****************************************************/
1317             ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr(
1318                 pi2_scr_buf_l0,
1319                 pi2_scr_buf_l1,
1320                 pu1_dst_buf,
1321                 (pu_wd_chroma << 1),
1322                 (pu_wd_chroma << 1),
1323                 dst_stride,
1324                 0,
1325                 0,
1326                 pu_ht_chroma,
1327                 pu_wd_chroma);
1328         }
1329     }
1330 }
1331