1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 **************************************************************************
22 * \file ih264d_inter_pred.c
23 *
24 * \brief
25 * This file contains routines to perform MotionCompensation tasks
26 *
27 * Detailed_description
28 *
29 * \date
30 * 20/11/2002
31 *
32 * \author Arvind Raman
33 **************************************************************************
34 */
35
36 #include <string.h>
37 #include "ih264d_defs.h"
38 #include "ih264d_mvpred.h"
39 #include "ih264d_error_handler.h"
40 #include "ih264d_structs.h"
41 #include "ih264d_defs.h"
42 #include "ih264d_inter_pred.h"
43 #include "ih264_typedefs.h"
44 #include "ih264_macros.h"
45 #include "ih264_platform_macros.h"
46 #include "ih264d_debug.h"
47 #include "ih264d_tables.h"
48 #include "ih264d_mb_utils.h"
49
50
51 void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk);
52
53
54
ih264d_copy_multiplex_data(UWORD8 * puc_Source,UWORD8 * puc_To,UWORD32 uc_w,UWORD32 uc_h,UWORD32 ui16_sourceWidth,UWORD32 ui16_toWidth)55 void ih264d_copy_multiplex_data(UWORD8 *puc_Source,
56 UWORD8 *puc_To,
57 UWORD32 uc_w,
58 UWORD32 uc_h,
59 UWORD32 ui16_sourceWidth,
60 UWORD32 ui16_toWidth)
61 {
62 UWORD8 uc_i, uc_j;
63
64 for(uc_i = 0; uc_i < uc_h; uc_i++)
65 {
66 memcpy(puc_To, puc_Source, uc_w);
67 puc_To += ui16_toWidth;
68 puc_Source += ui16_sourceWidth;
69 }
70 }
71
72
73 /*!
74 **************************************************************************
75 * \if Function name : dma_2d1d \endif
76 *
77 * \brief
78 * 2D -> 1D linear DMA into the reference buffers
79 *
80 * \return
81 * None
82 **************************************************************************
83 */
ih264d_copy_2d1d(UWORD8 * puc_src,UWORD8 * puc_dest,UWORD16 ui16_srcWidth,UWORD16 ui16_widthToFill,UWORD16 ui16_heightToFill)84 void ih264d_copy_2d1d(UWORD8 *puc_src,
85 UWORD8 *puc_dest,
86 UWORD16 ui16_srcWidth,
87 UWORD16 ui16_widthToFill,
88 UWORD16 ui16_heightToFill)
89 {
90 UWORD32 uc_w, uc_h;
91 for(uc_h = ui16_heightToFill; uc_h != 0; uc_h--)
92 {
93 memcpy(puc_dest, puc_src, ui16_widthToFill);
94 puc_dest += ui16_widthToFill;
95 puc_src += ui16_srcWidth;
96 }
97 }
98
99 /*!
100 **************************************************************************
101 * \if Function name : ih264d_fill_pred_info \endif
102 *
103 * \brief
104 * Fills inter prediction related info
105 *
106 * \return
107 * None
108 **************************************************************************
109 */
ih264d_fill_pred_info(WORD16 * pi2_mv,WORD32 part_width,WORD32 part_height,WORD32 sub_mb_num,WORD32 pred_dir,pred_info_pkd_t * ps_pred_pkd,WORD8 i1_buf_id,WORD8 i1_ref_idx,UWORD32 * pu4_wt_offset,UWORD8 u1_pic_type)110 void ih264d_fill_pred_info(WORD16 *pi2_mv,WORD32 part_width,WORD32 part_height, WORD32 sub_mb_num,
111 WORD32 pred_dir,pred_info_pkd_t *ps_pred_pkd,WORD8 i1_buf_id,
112 WORD8 i1_ref_idx,UWORD32 *pu4_wt_offset,UWORD8 u1_pic_type)
113 {
114 WORD32 insert_bits;
115
116 ps_pred_pkd->i2_mv[0] = pi2_mv[0];
117 ps_pred_pkd->i2_mv[1] = pi2_mv[1];
118
119 insert_bits = sub_mb_num & 3; /*sub mb x*/
120 ps_pred_pkd->i1_size_pos_info = insert_bits;
121 insert_bits = sub_mb_num >> 2;/*sub mb y*/
122 ps_pred_pkd->i1_size_pos_info |= insert_bits << 2;
123 insert_bits = part_width >> 1;
124 ps_pred_pkd->i1_size_pos_info |= insert_bits << 4;
125 insert_bits = part_height >> 1;
126 ps_pred_pkd->i1_size_pos_info |= insert_bits << 6;
127
128 ps_pred_pkd->i1_ref_idx_info = i1_ref_idx;
129 ps_pred_pkd->i1_ref_idx_info |= (pred_dir << 6);
130 ps_pred_pkd->i1_buf_id = i1_buf_id;
131 ps_pred_pkd->pu4_wt_offst = pu4_wt_offset;
132 ps_pred_pkd->u1_pic_type = u1_pic_type;
133
134
135 }
136
137
138
139
140
141
142
143 /*****************************************************************************/
144 /* \if Function name : formMbPartInfo \endif */
145 /* */
146 /* \brief */
147 /* Form the Mb partition information structure, to be used by the MC */
148 /* routine */
149 /* */
150 /* \return */
151 /* None */
152 /* \note */
153 /* c_bufx is used to select PredBuffer, */
154 /* if it's only Forward/Backward prediction always buffer used is */
155 /* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */
156 /* pu1_mb_cr_pred_buffer[0 to X1] */
157 /* */
158 /* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */
159 /* ..PredBuffer[X2 to X3] for backward prediction. and */
160 /* */
161 /* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/
162 /* and ..PredBuffer[X2 to X3] */
163 /* */
164 /* X1 is 255 for Luma and 63 for Chroma */
165 /* X2 is 256 for Luma and 64 for Chroma */
166 /* X3 is 511 for Luma and 127 for Chroma */
167 /* */
168 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
169 /* 11 05 2005 SWRN Modified to handle pod */
170 /*****************************************************************************/
171
ih264d_form_mb_part_info_bp(pred_info_pkd_t * ps_pred_pkd,dec_struct_t * ps_dec,UWORD16 u2_mb_x,UWORD16 u2_mb_y,WORD32 mb_index,dec_mb_info_t * ps_cur_mb_info)172 WORD32 ih264d_form_mb_part_info_bp(pred_info_pkd_t *ps_pred_pkd,
173 dec_struct_t * ps_dec,
174 UWORD16 u2_mb_x,
175 UWORD16 u2_mb_y,
176 WORD32 mb_index,
177 dec_mb_info_t *ps_cur_mb_info)
178 {
179 /* The reference buffer pointer */
180 WORD32 i2_frm_x, i2_frm_y;
181 WORD32 i2_tmp_mv_x, i2_tmp_mv_y;
182 WORD32 i2_rec_x, i2_rec_y;
183
184 WORD32 u2_pic_ht;
185 WORD32 u2_frm_wd;
186 WORD32 u2_rec_wd;
187 UWORD8 u1_sub_x = 0,u1_sub_y=0 ;
188 UWORD8 u1_part_wd = 0,u1_part_ht = 0;
189 WORD16 i2_mv_x,i2_mv_y;
190
191 /********************************************/
192 /* i1_mc_wd width reqd for mcomp */
193 /* u1_dma_ht height reqd for mcomp */
194 /* u1_dma_wd width aligned to 4 bytes */
195 /* u1_dx fractional part of width */
196 /* u1_dx fractional part of height */
197 /********************************************/
198 UWORD32 i1_mc_wd;
199
200 WORD32 u1_dma_ht;
201
202 UWORD32 u1_dma_wd;
203 UWORD32 u1_dx;
204 UWORD32 u1_dy;
205 pred_info_t * ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
206 dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
207 tfr_ctxt_t *ps_frame_buf;
208 struct pic_buffer_t *ps_ref_frm;
209 UWORD8 u1_scale_ref,u1_mbaff,u1_field;
210 pic_buffer_t **pps_ref_frame;
211 WORD8 i1_size_pos_info,i1_buf_id;
212
213 PROFILE_DISABLE_MB_PART_INFO()
214
215 UNUSED(ps_cur_mb_info);
216 i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
217 GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
218 GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
219 GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
220 GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
221 i2_mv_x = ps_pred_pkd->i2_mv[0];
222 i2_mv_y = ps_pred_pkd->i2_mv[1];
223 i1_buf_id = ps_pred_pkd->i1_buf_id;
224
225
226 ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
227
228
229 {
230 ps_frame_buf = &ps_dec->s_tran_addrecon;
231 }
232
233
234 /* Transfer Setup Y */
235 {
236 UWORD8 *pu1_pred, *pu1_rec;
237
238 /* calculating rounded motion vectors and fractional components */
239 i2_tmp_mv_x = i2_mv_x;
240 i2_tmp_mv_y = i2_mv_y;
241 u1_dx = i2_tmp_mv_x & 0x3;
242 u1_dy = i2_tmp_mv_y & 0x3;
243 i2_tmp_mv_x >>= 2;
244 i2_tmp_mv_y >>= 2;
245 i1_mc_wd = u1_part_wd << 2;
246 u1_dma_ht = u1_part_ht << 2;
247 if(u1_dx)
248 {
249 i2_tmp_mv_x -= 2;
250 i1_mc_wd += 5;
251 }
252 if(u1_dy)
253 {
254 i2_tmp_mv_y -= 2;
255 u1_dma_ht += 5;
256 }
257
258 /********************************************************************/
259 /* Calulating the horizontal and the vertical u4_ofst from top left */
260 /* edge of the reference frame, and subsequent clipping */
261 /********************************************************************/
262 u2_pic_ht = ps_dec->u2_pic_ht;
263 u2_frm_wd = ps_dec->u2_frm_wd_y;
264 i2_rec_x = u1_sub_x << 2;
265 i2_rec_y = u1_sub_y << 2;
266
267 i2_frm_x = (u2_mb_x << 4) + i2_rec_x + i2_tmp_mv_x;
268 i2_frm_y = (u2_mb_y << 4) + i2_rec_y + i2_tmp_mv_y;
269
270 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
271 i2_frm_x);
272 i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
273
274 pu1_pred = ps_ref_frm->pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
275
276 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
277
278 /********************************************************************/
279 /* Calulating the horizontal and the vertical u4_ofst from top left */
280 /* edge of the recon buffer */
281 /********************************************************************/
282 u2_rec_wd = MB_SIZE;
283 {
284 u2_rec_wd = ps_dec->u2_frm_wd_y;
285 i2_rec_x += (mb_index << 4);
286 pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
287 + i2_rec_x;
288 }
289
290 /* filling the pred and dma structures for Y */
291 u2_frm_wd = ps_dec->u2_frm_wd_y;
292
293 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
294 ps_pred->i1_dma_ht = u1_dma_ht;
295 ps_pred->i1_mc_wd = i1_mc_wd;
296 ps_pred->u2_frm_wd = u2_frm_wd;
297 ps_pred->pu1_rec_y_u = pu1_rec;
298 ps_pred->u2_dst_stride = u2_rec_wd;
299
300 ps_pred->i1_mb_partwidth = u1_part_wd << 2;
301 ps_pred->i1_mb_partheight = u1_part_ht << 2;
302 ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
303
304 ps_pred->pu1_y_ref = pu1_pred;
305
306 }
307
308 /* Increment ps_pred index */
309 ps_pred++;
310
311 /* Transfer Setup U & V */
312 {
313 WORD32 i4_ref_offset, i4_rec_offset;
314 UWORD8 *pu1_pred_u, *pu1_pred_v;
315
316
317 /* calculating rounded motion vectors and fractional components */
318 i2_tmp_mv_x = i2_mv_x;
319 i2_tmp_mv_y = i2_mv_y;
320
321 /************************************************************************/
322 /* Table 8-9: Derivation of the vertical component of the chroma vector */
323 /* in field coding mode */
324 /************************************************************************/
325
326 /* Eighth sample of the chroma MV */
327 u1_dx = i2_tmp_mv_x & 0x7;
328 u1_dy = i2_tmp_mv_y & 0x7;
329
330 /********************************************************************/
331 /* Calculating the full pel MV for chroma which is 1/2 of the Luma */
332 /* MV in full pel units */
333 /********************************************************************/
334 i2_mv_x = i2_tmp_mv_x;
335 i2_mv_y = i2_tmp_mv_y;
336 i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
337 i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
338 i1_mc_wd = u1_part_wd << 1;
339 u1_dma_ht = u1_part_ht << 1;
340 if(u1_dx)
341 {
342 i2_tmp_mv_x -= (i2_mv_x < 0);
343 i1_mc_wd++;
344 }
345 if(u1_dy != 0)
346 {
347 i2_tmp_mv_y -= (i2_mv_y < 0);
348 u1_dma_ht++;
349 }
350
351 /********************************************************************/
352 /* Calulating the horizontal and the vertical u4_ofst from top left */
353 /* edge of the reference frame, and subsequent clipping */
354 /********************************************************************/
355 u2_pic_ht >>= 1;
356 u2_frm_wd = ps_dec->u2_frm_wd_uv;
357 i2_rec_x = u1_sub_x << 1;
358 i2_rec_y = u1_sub_y << 1;
359
360 i2_frm_x = (u2_mb_x << 3) + i2_rec_x + i2_tmp_mv_x;
361 i2_frm_y = (u2_mb_y << 3) + i2_rec_y + i2_tmp_mv_y;
362
363 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
364 ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
365 i2_frm_y = CLIP3(((1 - u1_dma_ht)), (u2_pic_ht - (1)), i2_frm_y);
366
367 i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
368 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
369
370 /********************************************************************/
371 /* Calulating the horizontal and the vertical u4_ofst from top left */
372 /* edge of the recon buffer */
373 /********************************************************************/
374 /* CHANGED CODE */
375 u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
376 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
377
378 {
379 u2_rec_wd = ps_dec->u2_frm_wd_uv;
380 i2_rec_x += (mb_index << 3);
381 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
382 ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
383 ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
384 + i4_rec_offset;
385 }
386
387 /* CHANGED CODE */
388
389 /* filling the common pred structures for U */
390 u2_frm_wd = ps_dec->u2_frm_wd_uv;
391
392 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
393 ps_pred->i1_dma_ht = u1_dma_ht;
394 ps_pred->i1_mc_wd = i1_mc_wd;
395
396 ps_pred->u2_frm_wd = u2_frm_wd;
397 ps_pred->u2_dst_stride = u2_rec_wd;
398
399 ps_pred->i1_mb_partwidth = u1_part_wd << 1;
400 ps_pred->i1_mb_partheight = u1_part_ht << 1;
401 ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
402
403 pu1_pred_u = ps_ref_frm->pu1_buf2 + i4_ref_offset;
404 pu1_pred_v = ps_ref_frm->pu1_buf3 + i4_ref_offset;
405
406 /* Copy U & V partitions */
407 ps_pred->pu1_u_ref = pu1_pred_u;
408
409 /* Increment the reference buffer Index */
410 ps_pred->pu1_v_ref = pu1_pred_v;
411 }
412
413 /* Increment ps_pred index */
414 ps_dec->u4_pred_info_idx += 2;
415
416 return OK;
417
418 }
419
420
421 /*****************************************************************************/
422 /* \if Function name : formMbPartInfo \endif */
423 /* */
424 /* \brief */
425 /* Form the Mb partition information structure, to be used by the MC */
426 /* routine */
427 /* */
428 /* \return */
429 /* None */
430 /* \note */
431 /* c_bufx is used to select PredBuffer, */
432 /* if it's only Forward/Backward prediction always buffer used is */
433 /* puc_MbLumaPredBuffer[0 to X1],pu1_mb_cb_pred_buffer[0 to X1] and */
434 /* pu1_mb_cr_pred_buffer[0 to X1] */
435 /* */
436 /* if it's bidirect for forward ..PredBuffer[0 to X1] buffer is used and */
437 /* ..PredBuffer[X2 to X3] for backward prediction. and */
438 /* */
439 /* Final predicted samples values are the average of ..PredBuffer[0 to X1]*/
440 /* and ..PredBuffer[X2 to X3] */
441 /* */
442 /* X1 is 255 for Luma and 63 for Chroma */
443 /* X2 is 256 for Luma and 64 for Chroma */
444 /* X3 is 511 for Luma and 127 for Chroma */
445 /* */
446 /* DD MM YYYY Author(s) Changes (Describe the changes made) */
447 /* 11 05 2005 SWRN Modified to handle pod */
448 /*****************************************************************************/
ih264d_form_mb_part_info_mp(pred_info_pkd_t * ps_pred_pkd,dec_struct_t * ps_dec,UWORD16 u2_mb_x,UWORD16 u2_mb_y,WORD32 mb_index,dec_mb_info_t * ps_cur_mb_info)449 WORD32 ih264d_form_mb_part_info_mp(pred_info_pkd_t *ps_pred_pkd,
450 dec_struct_t * ps_dec,
451 UWORD16 u2_mb_x,
452 UWORD16 u2_mb_y,
453 WORD32 mb_index,
454 dec_mb_info_t *ps_cur_mb_info)
455 {
456 /* The reference buffer pointer */
457 UWORD8 *pu1_ref_buf;
458 WORD16 i2_frm_x, i2_frm_y, i2_tmp_mv_x, i2_tmp_mv_y, i2_pod_ht;
459 WORD16 i2_rec_x, i2_rec_y;
460 UWORD16 u2_pic_ht, u2_frm_wd, u2_rec_wd;
461 UWORD8 u1_wght_pred_type, u1_wted_bipred_idc;
462 UWORD16 u2_tot_ref_scratch_size;
463 UWORD8 u1_sub_x = 0;
464 UWORD8 u1_sub_y = 0;
465 UWORD8 u1_is_bi_dir = 0;
466
467 /********************************************/
468 /* i1_mc_wd width reqd for mcomp */
469 /* u1_dma_ht height reqd for mcomp */
470 /* u1_dma_wd width aligned to 4 bytes */
471 /* u1_dx fractional part of width */
472 /* u1_dx fractional part of height */
473 /********************************************/
474 UWORD8 i1_mc_wd, u1_dma_ht, u1_dma_wd, u1_dx, u1_dy;
475 pred_info_t * ps_pred ;
476 dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
477 const UWORD8 u1_slice_type = ps_cur_slice->u1_slice_type;
478 UWORD8 u1_pod_bot, u1_pod_top;
479
480 /* load the pictype for pod u4_flag & chroma motion vector derivation */
481 UWORD8 u1_ref_pic_type ;
482
483 /* set default value to flags specifying field nature of picture & mb */
484 UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
485 UWORD32 u1_mb_bot = 0, u1_pic_bot = 0, u1_mb_or_pic_bot;
486 tfr_ctxt_t *ps_frame_buf;
487 /* calculate flags specifying field nature of picture & mb */
488 const UWORD32 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
489 WORD8 i1_pred;
490 WORD8 i1_size_pos_info,i1_buf_id,i1_ref_idx;
491 UWORD8 u1_part_wd,u1_part_ht;
492 WORD16 i2_mv_x,i2_mv_y;
493 struct pic_buffer_t *ps_ref_frm;
494 UWORD32 *pu4_wt_offset;
495 UWORD8 *pu1_buf1,*pu1_buf2,*pu1_buf3;
496
497
498 PROFILE_DISABLE_MB_PART_INFO()
499
500 ps_pred = ps_dec->ps_pred + ps_dec->u4_pred_info_idx;
501
502
503 i1_size_pos_info = ps_pred_pkd->i1_size_pos_info;
504 GET_XPOS_PRED(u1_sub_x,i1_size_pos_info);
505 GET_YPOS_PRED(u1_sub_y,i1_size_pos_info);
506 GET_WIDTH_PRED(u1_part_wd,i1_size_pos_info);
507 GET_HEIGHT_PRED(u1_part_ht,i1_size_pos_info);
508 i2_mv_x = ps_pred_pkd->i2_mv[0];
509 i2_mv_y = ps_pred_pkd->i2_mv[1];
510 i1_ref_idx = ps_pred_pkd->i1_ref_idx_info & 0x3f;
511 i1_buf_id = ps_pred_pkd->i1_buf_id;
512 ps_ref_frm = ps_dec->apv_buf_id_pic_buf_map[i1_buf_id];
513
514 i1_pred = (ps_pred_pkd->i1_ref_idx_info & 0xC0) >> 6;
515 u1_is_bi_dir = (i1_pred == BI_PRED);
516
517
518 u1_ref_pic_type = ps_pred_pkd->u1_pic_type & PIC_MASK;
519
520 pu1_buf1 = ps_ref_frm->pu1_buf1;
521 pu1_buf2 = ps_ref_frm->pu1_buf2;
522 pu1_buf3 = ps_ref_frm->pu1_buf3;
523
524 if(u1_ref_pic_type == BOT_FLD)
525 {
526 pu1_buf1 += ps_ref_frm->u2_frm_wd_y;
527 pu1_buf2 += ps_ref_frm->u2_frm_wd_uv;
528 pu1_buf3 += ps_ref_frm->u2_frm_wd_uv;
529
530 }
531
532
533
534 if(ps_dec->ps_cur_pps->u1_wted_pred_flag)
535 {
536 pu4_wt_offset = (UWORD32*)&ps_dec->pu4_wt_ofsts[2
537 * X3(i1_ref_idx)];
538 }
539
540
541 pu4_wt_offset = ps_pred_pkd->pu4_wt_offst;
542
543
544 /* Pointer to the frame buffer */
545 {
546 ps_frame_buf = &ps_dec->s_tran_addrecon;
547 /* CHANGED CODE */
548 }
549
550 if(!u1_pic_fld)
551 {
552 u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
553 u1_mb_bot = 1 - ps_cur_mb_info->u1_topmb;
554 }
555 else
556 u1_pic_bot = ps_cur_slice->u1_bottom_field_flag;
557
558 /****************************************************************/
559 /* calculating the flags the tell whether to use frame-padding */
560 /* or use software pad-on-demand */
561 /****************************************************************/
562 u1_mb_or_pic_bot = u1_mb_bot | u1_pic_bot;
563 u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
564 u1_pod_bot = u1_mb_or_pic_fld && (u1_ref_pic_type == TOP_FLD);
565 u1_pod_top = u1_mb_or_pic_fld && (u1_ref_pic_type == BOT_FLD);
566
567 /* Weighted Pred additions */
568 u1_wted_bipred_idc = ps_dec->ps_cur_pps->u1_wted_bipred_idc;
569
570 if((u1_slice_type == P_SLICE) || (u1_slice_type == SP_SLICE))
571 {
572 /* P Slice only */
573 u1_wght_pred_type = ps_dec->ps_cur_pps->u1_wted_pred_flag;
574
575 }
576 else
577 {
578 /* B Slice only */
579 u1_wght_pred_type = 1 + u1_is_bi_dir;
580 if(u1_wted_bipred_idc == 0)
581 u1_wght_pred_type = 0;
582 if((u1_wted_bipred_idc == 2) && (!u1_is_bi_dir))
583 u1_wght_pred_type = 0;
584 }
585 /* load the scratch reference buffer index */
586 pu1_ref_buf = ps_dec->pu1_ref_buff + ps_dec->u4_dma_buf_idx;
587 u2_tot_ref_scratch_size = 0;
588
589
590 /* Transfer Setup Y */
591 {
592 UWORD8 *pu1_pred, *pu1_rec;
593 /* calculating rounded motion vectors and fractional components */
594 i2_tmp_mv_x = i2_mv_x;
595 i2_tmp_mv_y = i2_mv_y;
596
597 u1_dx = i2_tmp_mv_x & 0x3;
598 u1_dy = i2_tmp_mv_y & 0x3;
599 i2_tmp_mv_x >>= 2;
600 i2_tmp_mv_y >>= 2;
601 i1_mc_wd = u1_part_wd << 2;
602 u1_dma_ht = u1_part_ht << 2;
603 if(u1_dx)
604 {
605 i2_tmp_mv_x -= 2;
606 i1_mc_wd += 5;
607 }
608 if(u1_dy)
609 {
610 i2_tmp_mv_y -= 2;
611 u1_dma_ht += 5;
612 }
613
614 /********************************************************************/
615 /* Calulating the horizontal and the vertical u4_ofst from top left */
616 /* edge of the reference frame, and subsequent clipping */
617 /********************************************************************/
618 u2_pic_ht = ps_dec->u2_pic_ht >> u1_pic_fld;
619 u2_frm_wd = ps_dec->u2_frm_wd_y << u1_pic_fld;
620 i2_frm_x = (u2_mb_x << 4) + (u1_sub_x << 2) + i2_tmp_mv_x;
621 i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 4)
622 + (((u1_sub_y << 2) + i2_tmp_mv_y) << u1_mb_fld);
623
624 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_X_FRM, (ps_dec->u2_pic_wd - 1),
625 i2_frm_x);
626 i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
627 (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
628
629 pu1_pred = pu1_buf1 + i2_frm_y * u2_frm_wd + i2_frm_x;
630 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
631 /********************************************************************/
632 /* Calulating the horizontal and the vertical u4_ofst from top left */
633 /* edge of the recon buffer */
634 /********************************************************************/
635 /* CHANGED CODE */
636 u2_rec_wd = MB_SIZE;
637 i2_rec_x = u1_sub_x << 2;
638 i2_rec_y = u1_sub_y << 2;
639 {
640 u2_rec_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
641 i2_rec_x += (mb_index << 4);
642 pu1_rec = ps_frame_buf->pu1_dest_y + i2_rec_y * u2_rec_wd
643 + i2_rec_x;
644 if(u1_mb_bot)
645 pu1_rec += ps_dec->u2_frm_wd_y << ((u1_mb_fld) ? 0 : 4);
646 }
647
648 /* CHANGED CODE */
649
650 /* filling the pred and dma structures for Y */
651 u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
652
653 ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
654 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
655 ps_pred->u2_frm_wd = u2_frm_wd;
656 ps_pred->i1_dma_ht = u1_dma_ht;
657 ps_pred->i1_mc_wd = i1_mc_wd;
658 ps_pred->pu1_rec_y_u = pu1_rec;
659 ps_pred->u2_dst_stride = u2_rec_wd;
660
661 ps_pred->i1_mb_partwidth = u1_part_wd << 2;
662 ps_pred->i1_mb_partheight = u1_part_ht << 2;
663 ps_pred->u1_dydx = (u1_dy << 2) + u1_dx;
664 ps_pred->u1_is_bi_direct = u1_is_bi_dir;
665 ps_pred->u1_pi1_wt_ofst_rec_v = (UWORD8 *)pu4_wt_offset;
666 ps_pred->u1_wght_pred_type = u1_wght_pred_type;
667 ps_pred->i1_pod_ht = 0;
668
669 /* Increment the Reference buffer Indices */
670 pu1_ref_buf += u1_dma_wd * u1_dma_ht;
671 u2_tot_ref_scratch_size += u1_dma_wd * u1_dma_ht;
672
673 /* unrestricted field motion comp for top region outside frame */
674 i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
675 if((i2_pod_ht > 0) && u1_pod_top)
676 {
677 ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
678 u1_dma_ht -= i2_pod_ht;
679 pu1_pred += i2_pod_ht * u2_frm_wd;
680 }
681 /* unrestricted field motion comp for bottom region outside frame */
682 else if(u1_pod_bot)
683 {
684 i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
685 if(i2_pod_ht > 0)
686 {
687 u1_dma_ht -= i2_pod_ht;
688 ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
689 }
690 }
691
692 /* Copy Y partition */
693
694 /*
695 * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
696 * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
697 */
698 if(ps_pred->i1_pod_ht)
699 {
700 ps_pred->pu1_pred = pu1_pred;
701 ps_pred->u1_dma_ht_y = u1_dma_ht;
702 ps_pred->u1_dma_wd_y = u1_dma_wd;
703 }
704 ps_pred->pu1_y_ref = pu1_pred;
705 }
706
707
708
709 /* Increment ps_pred index */
710 ps_pred++;
711
712 /* Transfer Setup U & V */
713 {
714 WORD32 i4_ref_offset, i4_rec_offset;
715 UWORD8 *pu1_pred_u, *pu1_pred_v, u1_tmp_dma_ht;
716 /* CHANGED CODE */
717 UWORD8 u1_chroma_cbp = (UWORD8)(ps_cur_mb_info->u1_cbp >> 4);
718 /* CHANGED CODE */
719
720 /* calculating rounded motion vectors and fractional components */
721 i2_tmp_mv_x = i2_mv_x;
722 i2_tmp_mv_y = i2_mv_y;
723
724 /************************************************************************/
725 /* Table 8-9: Derivation of the vertical component of the chroma vector */
726 /* in field coding mode */
727 /************************************************************************/
728 if(u1_pod_bot && u1_mb_or_pic_bot)
729 i2_tmp_mv_y += 2;
730 if(u1_pod_top && !u1_mb_or_pic_bot)
731 i2_tmp_mv_y -= 2;
732
733 /* Eighth sample of the chroma MV */
734 u1_dx = i2_tmp_mv_x & 0x7;
735 u1_dy = i2_tmp_mv_y & 0x7;
736
737 /********************************************************************/
738 /* Calculating the full pel MV for chroma which is 1/2 of the Luma */
739 /* MV in full pel units */
740 /********************************************************************/
741 i2_mv_x = i2_tmp_mv_x;
742 i2_mv_y = i2_tmp_mv_y;
743 i2_tmp_mv_x = SIGN_POW2_DIV(i2_tmp_mv_x, 3);
744 i2_tmp_mv_y = SIGN_POW2_DIV(i2_tmp_mv_y, 3);
745 i1_mc_wd = u1_part_wd << 1;
746 u1_dma_ht = u1_part_ht << 1;
747 if(u1_dx)
748 {
749 if(i2_mv_x < 0)
750 i2_tmp_mv_x -= 1;
751 i1_mc_wd++;
752 }
753 if(u1_dy != 0)
754 {
755 if(i2_mv_y < 0)
756 i2_tmp_mv_y -= 1;
757 u1_dma_ht++;
758 }
759
760 /********************************************************************/
761 /* Calulating the horizontal and the vertical u4_ofst from top left */
762 /* edge of the reference frame, and subsequent clipping */
763 /********************************************************************/
764 u2_pic_ht >>= 1;
765 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_pic_fld;
766 i2_frm_x = (u2_mb_x << 3) + (u1_sub_x << 1) + i2_tmp_mv_x;
767 i2_frm_y = ((u2_mb_y + (u1_mb_bot && !u1_mb_fld)) << 3)
768 + (((u1_sub_y << 1) + i2_tmp_mv_y) << u1_mb_fld);
769
770 i2_frm_x = CLIP3(MAX_OFFSET_OUTSIDE_UV_FRM,
771 ((ps_dec->u2_pic_wd >> 1) - 1), i2_frm_x);
772 i2_frm_y = CLIP3(((1 - u1_dma_ht) << u1_mb_fld),
773 (u2_pic_ht - (1 << u1_mb_fld)), i2_frm_y);
774
775 i4_ref_offset = i2_frm_y * u2_frm_wd + i2_frm_x * YUV420SP_FACTOR;
776 u1_dma_wd = (i1_mc_wd + 3) & 0xFC;
777
778 /********************************************************************/
779 /* Calulating the horizontal and the vertical u4_ofst from top left */
780 /* edge of the recon buffer */
781 /********************************************************************/
782 /* CHANGED CODE */
783 u2_rec_wd = BLK8x8SIZE * YUV420SP_FACTOR;
784 i2_rec_x = u1_sub_x << 1;
785 i2_rec_y = u1_sub_y << 1;
786 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
787 {
788 u2_rec_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
789
790 i2_rec_x += (mb_index << 3);
791 i4_rec_offset = i2_rec_y * u2_rec_wd + i2_rec_x * YUV420SP_FACTOR;
792 if(u1_mb_bot)
793 i4_rec_offset += ps_dec->u2_frm_wd_uv << ((u1_mb_fld) ? 0 : 3);
794 ps_pred->pu1_rec_y_u = ps_frame_buf->pu1_dest_u + i4_rec_offset;
795 ps_pred->u1_pi1_wt_ofst_rec_v = ps_frame_buf->pu1_dest_v
796 + i4_rec_offset;
797
798 }
799
800 /* CHANGED CODE */
801
802 /* filling the common pred structures for U */
803 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
804 u1_tmp_dma_ht = u1_dma_ht;
805 ps_pred->u2_u1_ref_buf_wd = u1_dma_wd;
806 ps_pred->u2_frm_wd = u2_frm_wd;
807 ps_pred->i1_dma_ht = u1_dma_ht;
808 ps_pred->i1_mc_wd = i1_mc_wd;
809 ps_pred->u2_dst_stride = u2_rec_wd;
810
811 ps_pred->i1_mb_partwidth = u1_part_wd << 1;
812 ps_pred->i1_mb_partheight = u1_part_ht << 1;
813 ps_pred->u1_dydx = (u1_dy << 3) + u1_dx;
814 ps_pred->u1_is_bi_direct = u1_is_bi_dir;
815 ps_pred->u1_wght_pred_type = u1_wght_pred_type;
816 ps_pred->i1_pod_ht = 0;
817
818 ps_pred->pu1_dma_dest_addr = pu1_ref_buf;
819
820 /* unrestricted field motion comp for top region outside frame */
821 i2_pod_ht = (-i2_frm_y) >> u1_mb_fld;
822 if((i2_pod_ht > 0) && u1_pod_top)
823 {
824 i4_ref_offset += i2_pod_ht * u2_frm_wd;
825 u1_dma_ht -= i2_pod_ht;
826 ps_pred->i1_pod_ht = (WORD8)(-i2_pod_ht);
827 }
828 /* unrestricted field motion comp for bottom region outside frame */
829 else if(u1_pod_bot)
830 {
831 i2_pod_ht = u1_dma_ht + ((i2_frm_y - u2_pic_ht) >> u1_mb_fld);
832 if(i2_pod_ht > 0)
833 {
834 u1_dma_ht -= i2_pod_ht;
835 ps_pred->i1_pod_ht = (WORD8)i2_pod_ht;
836 }
837 }
838
839 pu1_pred_u = pu1_buf2 + i4_ref_offset;
840 pu1_pred_v = pu1_buf3 + i4_ref_offset;
841
842 /* Copy U & V partitions */
843 if(ps_pred->i1_pod_ht)
844 {
845 ps_pred->pu1_pred_u = pu1_pred_u;
846 ps_pred->u1_dma_ht_uv = u1_dma_ht;
847 ps_pred->u1_dma_wd_uv = u1_dma_wd;
848
849 }
850 ps_pred->pu1_u_ref = pu1_pred_u;
851
852 /* Increment the reference buffer Index */
853 u2_tot_ref_scratch_size += (u1_dma_wd * u1_tmp_dma_ht) << 1;
854
855 if(ps_pred->i1_pod_ht)
856 {
857 ps_pred->pu1_pred_v = pu1_pred_v;
858 ps_pred->u1_dma_ht_uv = u1_dma_ht;
859 ps_pred->u1_dma_wd_uv = u1_dma_wd;
860 }
861
862 ps_pred->pu1_v_ref = pu1_pred_v;
863 }
864
865 /* Increment ps_pred index */
866 ps_dec->u4_pred_info_idx += 2;
867
868
869 /* Increment the reference buffer Index */
870 ps_dec->u4_dma_buf_idx += u2_tot_ref_scratch_size;
871
872 if(ps_dec->u4_dma_buf_idx > MAX_REF_BUF_SIZE)
873 return ERROR_NUM_MV;
874
875 return OK;
876
877
878
879 }
880
881
882 /*!
883 **************************************************************************
884 * \if Function name : MotionCompensate \endif
885 *
886 * \brief
887 * The routine forms predictor blocks for the entire MB and stores it in
888 * predictor buffers.This function works only for BASELINE profile
889 *
890 * \param ps_dec: Pointer to the structure decStruct. This is used to get
891 * pointers to the current and the reference frame and to the MbParams
892 * structure.
893 *
894 * \return
895 * None
896 *
897 * \note
898 * The routine forms predictors for all the luma and the chroma MB
899 * partitions.
900 **************************************************************************
901 */
902
ih264d_motion_compensate_bp(dec_struct_t * ps_dec,dec_mb_info_t * ps_cur_mb_info)903 void ih264d_motion_compensate_bp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info)
904 {
905 pred_info_t *ps_pred ;
906 UWORD8 *puc_ref, *pu1_dest_y;
907 UWORD8 *pu1_dest_u;
908 UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
909
910 UWORD32 u4_wd_y, u4_ht_y, u4_wd_uv;
911 UWORD32 u4_ht_uv;
912 UWORD8 *puc_pred0 = (UWORD8 *)(ps_dec->pi2_pred1);
913
914
915 PROFILE_DISABLE_INTER_PRED()
916 UNUSED(ps_cur_mb_info);
917 ps_pred = ps_dec->ps_pred ;
918
919 for(u2_num_pels = 0; u2_num_pels < 256;)
920 {
921 UWORD32 uc_dx, uc_dy;
922 /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
923 the MB partition are zero then it would be better to copy the
924 predictor valus directly to the current frame buffer */
925 /*
926 * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
927 * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
928 */
929
930 u2_ref_wd_y = ps_pred->u2_frm_wd;
931 puc_ref = ps_pred->pu1_y_ref;
932 if(ps_pred->u1_dydx & 0x3)
933 puc_ref += 2;
934 if(ps_pred->u1_dydx >> 2)
935 puc_ref += 2 * u2_ref_wd_y;
936
937 u4_wd_y = ps_pred->i1_mb_partwidth;
938 u4_ht_y = ps_pred->i1_mb_partheight;
939 uc_dx = ps_pred->u1_dydx;
940 uc_dy = uc_dx >> 2;
941 uc_dx &= 0x3;
942
943 pu1_dest_y = ps_pred->pu1_rec_y_u;
944 u2_dst_wd = ps_pred->u2_dst_stride;
945
946 ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
947 u2_ref_wd_y,
948 u2_dst_wd,
949 u4_ht_y,
950 u4_wd_y, puc_pred0,
951 ps_pred->u1_dydx);
952
953 ps_pred++;
954
955 /* Interpolate samples for the chroma components */
956 {
957 UWORD8 *pu1_ref_u;
958
959 u2_ref_wd_uv = ps_pred->u2_frm_wd;
960 pu1_ref_u = ps_pred->pu1_u_ref;
961
962 u4_wd_uv = ps_pred->i1_mb_partwidth;
963 u4_ht_uv = ps_pred->i1_mb_partheight;
964 uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
965 uc_dy = uc_dx >> 3;
966 uc_dx &= 0x7;
967
968 pu1_dest_u = ps_pred->pu1_rec_y_u;
969 u2_dst_wd = ps_pred->u2_dst_stride;
970
971 ps_pred++;
972 ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u, u2_ref_wd_uv,
973 u2_dst_wd, uc_dx, uc_dy,
974 u4_ht_uv, u4_wd_uv);
975
976 }
977
978 u2_num_pels += (UWORD8)u4_wd_y * (UWORD8)u4_ht_y;
979
980 }
981 }
982
983
984 /*
985 **************************************************************************
986 * \if Function name : MotionCompensateB \endif
987 *
988 * \brief
989 * The routine forms predictor blocks for the entire MB and stores it in
990 * predictor buffers.
991 *
992 * \param ps_dec: Pointer to the structure decStruct. This is used to get
993 * pointers to the current and the reference frame and to the MbParams
994 * structure.
995 *
996 * \return
997 * None
998 *
999 * \note
1000 * The routine forms predictors for all the luma and the chroma MB
1001 * partitions.
1002 **************************************************************************
1003 */
1004
ih264d_motion_compensate_mp(dec_struct_t * ps_dec,dec_mb_info_t * ps_cur_mb_info)1005 void ih264d_motion_compensate_mp(dec_struct_t * ps_dec, dec_mb_info_t *ps_cur_mb_info)
1006 {
1007 pred_info_t *ps_pred ;
1008 pred_info_t *ps_pred_y_forw, *ps_pred_y_back, *ps_pred_cr_forw;
1009 UWORD8 *puc_ref, *pu1_dest_y, *puc_pred0, *puc_pred1;
1010 UWORD8 *pu1_dest_u, *pu1_dest_v;
1011 WORD16 *pi16_intm;
1012 UWORD32 u2_num_pels, u2_ref_wd_y, u2_ref_wd_uv, u2_dst_wd;
1013 UWORD32 u2_dest_wd_y, u2_dest_wd_uv;
1014 UWORD32 u2_row_buf_wd_y = 0;
1015 UWORD32 u2_row_buf_wd_uv = 0;
1016 UWORD32 u2_log2Y_crwd = ps_dec->ps_cur_slice->u2_log2Y_crwd;
1017 UWORD32 u4_wd_y, u4_ht_y, u1_dir, u4_wd_uv;
1018 UWORD32 u4_ht_uv;
1019 UWORD8 *pu1_temp_mc_buffer = ps_dec->pu1_temp_mc_buffer;
1020 WORD32 i2_pod_ht;
1021 UWORD32 u2_pic_ht, u2_frm_wd, u2_rec_wd;
1022 UWORD32 u1_pod_bot, u1_pod_top;
1023 UWORD8 *pu1_pred, *pu1_dma_dst;
1024 UWORD32 u1_dma_wd, u1_dma_ht;
1025
1026 dec_slice_params_t * const ps_cur_slice = ps_dec->ps_cur_slice;
1027
1028 /* set default value to flags specifying field nature of picture & mb */
1029 UWORD32 u1_mb_fld = 0, u1_mb_or_pic_fld;
1030 UWORD32 u1_mb_or_pic_bot;
1031 /* calculate flags specifying field nature of picture & mb */
1032 const UWORD8 u1_pic_fld = ps_cur_slice->u1_field_pic_flag;
1033
1034 PROFILE_DISABLE_INTER_PRED()
1035 ps_pred = ps_dec->ps_pred ;
1036 /* Initialize both ps_pred_y_forw, ps_pred_cr_forw and ps_pred_y_back
1037 * to avoid static analysis warnings */
1038 ps_pred_y_forw = ps_pred;
1039 ps_pred_y_back = ps_pred;
1040 ps_pred_cr_forw = ps_pred;
1041
1042 if(ps_dec->u1_separate_parse)
1043 u2_log2Y_crwd = ps_dec->ps_decode_cur_slice->u2_log2Y_crwd;
1044
1045 if(!u1_pic_fld)
1046 {
1047 u1_mb_fld = ps_cur_mb_info->u1_mb_field_decodingflag;
1048 }
1049
1050 u1_mb_or_pic_fld = u1_mb_fld | u1_pic_fld;
1051
1052 pi16_intm = ps_dec->pi2_pred1;
1053 puc_pred0 = (UWORD8 *)pi16_intm;
1054 puc_pred1 = puc_pred0 + PRED_BUFFER_WIDTH * PRED_BUFFER_HEIGHT * sizeof(WORD16);
1055
1056 for(u2_num_pels = 0; u2_num_pels < 256;)
1057 {
1058 UWORD8 uc_dx, uc_dy;
1059 const UWORD8 u1_is_bi_direct = ps_pred->u1_is_bi_direct;
1060 for(u1_dir = 0; u1_dir <= u1_is_bi_direct; u1_dir++)
1061 {
1062 /* Pointer to the destination buffer. If the CBPs of all 8x8 blocks in
1063 the MB partition are zero then it would be better to copy the
1064 predictor valus directly to the current frame buffer */
1065 /*
1066 * ps_pred->i1_pod_ht is non zero when MBAFF is present. In case of MBAFF the reference data
1067 * is copied in the Scrath buffer so that the padding_on_demand doesnot corrupt the frame data
1068 */
1069
1070 if(ps_pred->i1_pod_ht)
1071 {
1072 u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
1073 puc_ref = ps_pred->pu1_dma_dest_addr;
1074 }
1075 else
1076 {
1077 u2_ref_wd_y = ps_pred->u2_frm_wd;
1078 puc_ref = ps_pred->pu1_y_ref;
1079
1080 }
1081
1082 if(ps_pred->u1_dydx & 0x3)
1083 puc_ref += 2;
1084 if(ps_pred->u1_dydx >> 2)
1085 puc_ref += 2 * u2_ref_wd_y;
1086 u4_wd_y = ps_pred->i1_mb_partwidth;
1087 u4_ht_y = ps_pred->i1_mb_partheight;
1088
1089 uc_dx = ps_pred->u1_dydx;
1090 uc_dy = uc_dx >> 2;
1091 uc_dx &= 0x3;
1092 if(u1_dir == 0)
1093 {
1094 pu1_dest_y = ps_pred->pu1_rec_y_u;
1095 u2_row_buf_wd_y = ps_pred->u2_dst_stride;
1096 u2_dst_wd = ps_pred->u2_dst_stride;
1097 u2_dest_wd_y = u2_dst_wd;
1098 ps_pred_y_forw = ps_pred;
1099 }
1100 else
1101 {
1102 pu1_dest_y = pu1_temp_mc_buffer;
1103 u2_dst_wd = MB_SIZE;
1104 u2_dest_wd_y = u2_dst_wd;
1105 ps_pred_y_back = ps_pred;
1106 ps_pred_y_back->pu1_rec_y_u = pu1_dest_y;
1107 }
1108
1109 /* padding on demand (POD) for y done here */
1110
1111 if(ps_pred->i1_pod_ht)
1112 {
1113 pu1_pred = ps_pred->pu1_pred;
1114 pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
1115 u1_dma_wd = ps_pred->u1_dma_wd_y;
1116 u1_dma_ht = ps_pred->u1_dma_ht_y;
1117 u2_frm_wd = ps_dec->u2_frm_wd_y << u1_mb_or_pic_fld;
1118 if(ps_pred->i1_pod_ht < 0)
1119 {
1120 pu1_dma_dst = pu1_dma_dst - (ps_pred->i1_pod_ht * ps_pred->u2_u1_ref_buf_wd);
1121 }
1122 ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd, u1_dma_wd,
1123 u1_dma_ht);
1124 ih264d_pad_on_demand(ps_pred, LUM_BLK);
1125 }
1126 ps_dec->apf_inter_pred_luma[ps_pred->u1_dydx](puc_ref, pu1_dest_y,
1127 u2_ref_wd_y,
1128 u2_dst_wd,
1129 u4_ht_y,
1130 u4_wd_y,
1131 puc_pred0,
1132 ps_pred->u1_dydx);
1133 ps_pred++;
1134
1135 /* Interpolate samples for the chroma components */
1136 {
1137 UWORD8 *pu1_ref_u;
1138 UWORD32 u1_dma_ht;
1139
1140 /* padding on demand (POD) for U and V done here */
1141 u1_dma_ht = ps_pred->i1_dma_ht;
1142
1143 if(ps_pred->i1_pod_ht)
1144 {
1145 pu1_pred = ps_pred->pu1_pred_u;
1146 pu1_dma_dst = ps_pred->pu1_dma_dest_addr;
1147 u1_dma_ht = ps_pred->u1_dma_ht_uv;
1148 u1_dma_wd = ps_pred->u1_dma_wd_uv * YUV420SP_FACTOR;
1149 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
1150 if(ps_pred->i1_pod_ht < 0)
1151 {
1152 /*Top POD*/
1153 pu1_dma_dst -= (ps_pred->i1_pod_ht
1154 * ps_pred->u2_u1_ref_buf_wd
1155 * YUV420SP_FACTOR);
1156 }
1157
1158 ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
1159 u1_dma_wd, u1_dma_ht);
1160
1161 pu1_dma_dst += (ps_pred->i1_dma_ht
1162 * ps_pred->u2_u1_ref_buf_wd);
1163 pu1_pred = ps_pred->pu1_pred_v;
1164
1165 ih264d_pad_on_demand(ps_pred, CHROM_BLK);
1166 }
1167
1168 if(ps_pred->i1_pod_ht)
1169 {
1170 pu1_ref_u = ps_pred->pu1_dma_dest_addr;
1171
1172 u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd
1173 * YUV420SP_FACTOR;
1174 }
1175 else
1176 {
1177 u2_ref_wd_uv = ps_pred->u2_frm_wd;
1178 pu1_ref_u = ps_pred->pu1_u_ref;
1179
1180 }
1181
1182 u4_wd_uv = ps_pred->i1_mb_partwidth;
1183 u4_ht_uv = ps_pred->i1_mb_partheight;
1184 uc_dx = ps_pred->u1_dydx; /* 8*dy + dx */
1185 uc_dy = uc_dx >> 3;
1186 uc_dx &= 0x7;
1187 if(u1_dir == 0)
1188 {
1189 pu1_dest_u = ps_pred->pu1_rec_y_u;
1190
1191 pu1_dest_v = ps_pred->u1_pi1_wt_ofst_rec_v;
1192 u2_row_buf_wd_uv = ps_pred->u2_dst_stride;
1193 u2_dst_wd = ps_pred->u2_dst_stride;
1194 u2_dest_wd_uv = u2_dst_wd;
1195 ps_pred_cr_forw = ps_pred;
1196 }
1197 else
1198 {
1199 pu1_dest_u = puc_pred0;
1200
1201 pu1_dest_v = puc_pred1;
1202 u2_dest_wd_uv = BUFFER_WIDTH;
1203 u2_dst_wd = BUFFER_WIDTH;
1204 ps_pred->pu1_rec_y_u = pu1_dest_u;
1205 ps_pred->u1_pi1_wt_ofst_rec_v = pu1_dest_v;
1206 }
1207
1208 ps_pred++;
1209 ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_dest_u,
1210 u2_ref_wd_uv, u2_dst_wd,
1211 uc_dx, uc_dy, u4_ht_uv,
1212 u4_wd_uv);
1213
1214 if(ps_cur_mb_info->u1_Mux == 1)
1215 {
1216 /******************************************************************/
1217 /* padding on demand (POD) for U and V done here */
1218 /* ps_pred now points to the Y entry of the 0,0 component */
1219 /* Y need not be checked for POD because Y lies within */
1220 /* the picture((0,0) mv for Y doesnot get changed. But (0,0) for */
1221 /* U and V can need POD beacause of cross-field mv adjustments */
1222 /* (Table 8-9 of standard) */
1223 /******************************************************************/
1224 if((ps_pred + 1)->i1_pod_ht)
1225 {
1226 pu1_pred = (ps_pred + 1)->pu1_pred_u;
1227 pu1_dma_dst = (ps_pred + 1)->pu1_dma_dest_addr;
1228 u1_dma_ht = (ps_pred + 1)->u1_dma_ht_uv;
1229 u1_dma_wd = (ps_pred + 1)->u1_dma_wd_uv
1230 * YUV420SP_FACTOR;
1231 u2_frm_wd = ps_dec->u2_frm_wd_uv << u1_mb_or_pic_fld;
1232 if((ps_pred + 1)->i1_pod_ht < 0)
1233 {
1234 /*Top POD*/
1235 pu1_dma_dst -= ((ps_pred + 1)->i1_pod_ht
1236 * (ps_pred + 1)->u2_u1_ref_buf_wd
1237 * YUV420SP_FACTOR);
1238 }
1239 ih264d_copy_2d1d(pu1_pred, pu1_dma_dst, u2_frm_wd,
1240 u1_dma_wd, u1_dma_ht);
1241 pu1_dma_dst += ((ps_pred + 1)->i1_dma_ht
1242 * (ps_pred + 1)->u2_u1_ref_buf_wd); //(u1_dma_ht * u1_dma_wd);//
1243 pu1_pred = (ps_pred + 1)->pu1_pred_v;
1244 ih264d_pad_on_demand(ps_pred + 1, CHROM_BLK);
1245
1246 }
1247
1248 ih264d_multiplex_ref_data(ps_dec, ps_pred, pu1_dest_y,
1249 pu1_dest_u, ps_cur_mb_info,
1250 u2_dest_wd_y, u2_dest_wd_uv,
1251 u1_dir);
1252 ps_pred += 2;
1253 }
1254 }
1255 }
1256 if(u1_dir != 0)
1257 u2_ref_wd_y = MB_SIZE;
1258
1259 u2_num_pels += u4_wd_y * u4_ht_y;
1260 /* if BI_DIRECT, average the two pred's, and put in ..PredBuffer[0] */
1261 if((u1_is_bi_direct != 0) || (ps_pred_y_forw->u1_wght_pred_type != 0))
1262 {
1263
1264 switch(ps_pred_y_forw->u1_wght_pred_type)
1265 {
1266 case 0:
1267 ps_dec->pf_default_weighted_pred_luma(
1268 ps_pred_y_forw->pu1_rec_y_u, pu1_dest_y,
1269 ps_pred_y_forw->pu1_rec_y_u,
1270 u2_row_buf_wd_y, u2_ref_wd_y,
1271 u2_row_buf_wd_y, u4_ht_uv * 2,
1272 u4_wd_uv * 2);
1273
1274 ps_dec->pf_default_weighted_pred_chroma(
1275 ps_pred_cr_forw->pu1_rec_y_u, pu1_dest_u,
1276 ps_pred_cr_forw->pu1_rec_y_u,
1277 u2_row_buf_wd_uv, u2_dst_wd,
1278 u2_row_buf_wd_uv, u4_ht_uv,
1279 u4_wd_uv);
1280
1281 break;
1282 case 1:
1283 {
1284 UWORD32 *pu4_weight_ofst =
1285 (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
1286 UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
1287 UWORD32 u4_wt_ofst_y =
1288 (UWORD32)(pu4_weight_ofst[0]);
1289 WORD32 weight = (WORD16)(u4_wt_ofst_y & 0xffff);
1290 WORD32 ofst = (WORD8)(u4_wt_ofst_y >> 16);
1291
1292 ps_dec->pf_weighted_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
1293 ps_pred_y_forw->pu1_rec_y_u,
1294 u2_row_buf_wd_y,
1295 u2_row_buf_wd_y,
1296 (u2_log2Y_crwd & 0x0ff),
1297 weight, ofst, u4_ht_y,
1298 u4_wd_y);
1299
1300 u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
1301 u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
1302 weight = ((u4_wt_ofst_v & 0xffff) << 16)
1303 | (u4_wt_ofst_u & 0xffff);
1304 ofst = ((u4_wt_ofst_v >> 16) << 8)
1305 | ((u4_wt_ofst_u >> 16) & 0xFF);
1306
1307 ps_dec->pf_weighted_pred_chroma(
1308 ps_pred_cr_forw->pu1_rec_y_u,
1309 ps_pred_cr_forw->pu1_rec_y_u,
1310 u2_row_buf_wd_uv, u2_row_buf_wd_uv,
1311 (u2_log2Y_crwd >> 8), weight, ofst,
1312 u4_ht_y >> 1, u4_wd_y >> 1);
1313 }
1314
1315 break;
1316 case 2:
1317 {
1318 UWORD32 *pu4_weight_ofst =
1319 (UWORD32*)ps_pred_y_forw->u1_pi1_wt_ofst_rec_v;
1320 UWORD32 u4_wt_ofst_u, u4_wt_ofst_v;
1321 UWORD32 u4_wt_ofst_y;
1322 WORD32 weight1, weight2;
1323 WORD32 ofst1, ofst2;
1324
1325 u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[0]);
1326
1327 weight1 = (WORD16)(u4_wt_ofst_y & 0xffff);
1328 ofst1 = (WORD8)(u4_wt_ofst_y >> 16);
1329
1330 u4_wt_ofst_y = (UWORD32)(pu4_weight_ofst[1]);
1331 weight2 = (WORD16)(u4_wt_ofst_y & 0xffff);
1332 ofst2 = (WORD8)(u4_wt_ofst_y >> 16);
1333
1334 ps_dec->pf_weighted_bi_pred_luma(ps_pred_y_forw->pu1_rec_y_u,
1335 ps_pred_y_back->pu1_rec_y_u,
1336 ps_pred_y_forw->pu1_rec_y_u,
1337 u2_row_buf_wd_y,
1338 u2_ref_wd_y,
1339 u2_row_buf_wd_y,
1340 (u2_log2Y_crwd & 0x0ff),
1341 weight1, weight2, ofst1,
1342 ofst2, u4_ht_y,
1343 u4_wd_y);
1344
1345 u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[2]);
1346 u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[4]);
1347 weight1 = ((u4_wt_ofst_v & 0xffff) << 16)
1348 | (u4_wt_ofst_u & 0xffff);
1349 ofst1 = ((u4_wt_ofst_v >> 16) << 8)
1350 | ((u4_wt_ofst_u >> 16) & 0xFF);
1351
1352 u4_wt_ofst_u = (UWORD32)(pu4_weight_ofst[3]);
1353 u4_wt_ofst_v = (UWORD32)(pu4_weight_ofst[5]);
1354 weight2 = ((u4_wt_ofst_v & 0xffff) << 16)
1355 | (u4_wt_ofst_u & 0xffff);
1356 ofst2 = ((u4_wt_ofst_v >> 16) << 8)
1357 | ((u4_wt_ofst_u >> 16) & 0xFF);
1358
1359 ps_dec->pf_weighted_bi_pred_chroma(
1360 (ps_pred_y_forw + 1)->pu1_rec_y_u,
1361 (ps_pred_y_back + 1)->pu1_rec_y_u,
1362 (ps_pred_y_forw + 1)->pu1_rec_y_u,
1363 u2_row_buf_wd_uv, u2_dst_wd,
1364 u2_row_buf_wd_uv, (u2_log2Y_crwd >> 8),
1365 weight1, weight2, ofst1, ofst2,
1366 u4_ht_y >> 1, u4_wd_y >> 1);
1367 }
1368
1369 break;
1370 }
1371
1372 }
1373 }
1374 }
1375
1376
1377 /*!
1378 **************************************************************************
1379 * \if Function name : ih264d_multiplex_ref_data \endif
1380 *
1381 * \brief
1382 * Initializes forward and backward refernce lists for B slice decoding.
1383 *
1384 *
1385 * \return
1386 * 0 on Success and Error code otherwise
1387 **************************************************************************
1388 */
1389
ih264d_multiplex_ref_data(dec_struct_t * ps_dec,pred_info_t * ps_pred,UWORD8 * pu1_dest_y,UWORD8 * pu1_dest_u,dec_mb_info_t * ps_cur_mb_info,UWORD16 u2_dest_wd_y,UWORD16 u2_dest_wd_uv,UWORD8 u1_dir)1390 void ih264d_multiplex_ref_data(dec_struct_t * ps_dec,
1391 pred_info_t *ps_pred,
1392 UWORD8* pu1_dest_y,
1393 UWORD8* pu1_dest_u,
1394 dec_mb_info_t *ps_cur_mb_info,
1395 UWORD16 u2_dest_wd_y,
1396 UWORD16 u2_dest_wd_uv,
1397 UWORD8 u1_dir)
1398 {
1399 UWORD16 u2_mask = ps_cur_mb_info->u2_mask[u1_dir];
1400 UWORD8 *pu1_ref_y, *pu1_ref_u;
1401 UWORD8 uc_cond, i, j, u1_dydx;
1402 UWORD16 u2_ref_wd_y, u2_ref_wd_uv;
1403
1404 PROFILE_DISABLE_INTER_PRED()
1405
1406 if(ps_pred->i1_pod_ht)
1407 {
1408 pu1_ref_y = ps_pred->pu1_dma_dest_addr;
1409
1410 u2_ref_wd_y = ps_pred->u2_u1_ref_buf_wd;
1411 }
1412 else
1413 {
1414 pu1_ref_y = ps_pred->pu1_y_ref;
1415 u2_ref_wd_y = ps_pred->u2_frm_wd;
1416 }
1417
1418 ps_pred++;
1419 if(ps_pred->i1_pod_ht)
1420 {
1421 pu1_ref_u = ps_pred->pu1_dma_dest_addr;
1422 u2_ref_wd_uv = ps_pred->u2_u1_ref_buf_wd * YUV420SP_FACTOR;
1423
1424 }
1425 else
1426 {
1427 pu1_ref_u = ps_pred->pu1_u_ref;
1428 u2_ref_wd_uv = ps_pred->u2_frm_wd;
1429
1430 }
1431
1432 u1_dydx = ps_pred->u1_dydx;
1433
1434 {
1435 UWORD8 uc_dx, uc_dy;
1436 UWORD8 *pu1_scratch_u;
1437
1438 uc_dx = u1_dydx & 0x3;
1439 uc_dy = u1_dydx >> 3;
1440 if(u1_dydx != 0)
1441 {
1442 pred_info_t * ps_prv_pred = ps_pred - 2;
1443 pu1_scratch_u = ps_prv_pred->pu1_dma_dest_addr;
1444 ps_dec->pf_inter_pred_chroma(pu1_ref_u, pu1_scratch_u,
1445 u2_ref_wd_uv, 16, uc_dx, uc_dy, 8,
1446 8);
1447
1448 /* Modify ref pointer and refWidth to point to scratch */
1449 /* buffer to be used below in ih264d_copy_multiplex_data functions */
1450 /* CHANGED CODE */
1451 pu1_ref_u = pu1_scratch_u;
1452 u2_ref_wd_uv = 8 * YUV420SP_FACTOR;
1453 }
1454 }
1455 {
1456 for(i = 0; i < 4; i++)
1457 {
1458 for(j = 0; j < 4; j++)
1459 {
1460 uc_cond = u2_mask & 1;
1461 u2_mask >>= 1;
1462 if(uc_cond)
1463 {
1464 *(UWORD32 *)(pu1_dest_y + u2_dest_wd_y) =
1465 *(UWORD32 *)(pu1_ref_y + u2_ref_wd_y);
1466 *(UWORD32 *)(pu1_dest_y + 2 * u2_dest_wd_y) =
1467 *(UWORD32 *)(pu1_ref_y + 2 * u2_ref_wd_y);
1468 *(UWORD32 *)(pu1_dest_y + 3 * u2_dest_wd_y) =
1469 *(UWORD32 *)(pu1_ref_y + 3 * u2_ref_wd_y);
1470 {
1471 UWORD32 *dst, *src;
1472 dst = (UWORD32 *)pu1_dest_y;
1473 src = (UWORD32 *)pu1_ref_y;
1474 *dst = *src;
1475 dst++;
1476 src++;
1477 pu1_dest_y = (UWORD8 *)dst;
1478 pu1_ref_y = (UWORD8 *)src;
1479 }
1480 *(UWORD32 *)(pu1_dest_u + u2_dest_wd_uv) =
1481 *(UWORD32 *)(pu1_ref_u + u2_ref_wd_uv);
1482 {
1483 UWORD32 *dst, *src;
1484 dst = (UWORD32 *)pu1_dest_u;
1485 src = (UWORD32 *)pu1_ref_u;
1486 *dst = *src;
1487 dst++;
1488 src++;
1489 pu1_dest_u = (UWORD8 *)dst;
1490 pu1_ref_u = (UWORD8 *)src;
1491 }
1492
1493 }
1494 else
1495 {
1496 pu1_dest_y += 4;
1497 pu1_ref_y += 4;
1498 pu1_dest_u += 2 * YUV420SP_FACTOR;
1499 pu1_ref_u += 2 * YUV420SP_FACTOR;
1500 }
1501 }
1502 pu1_ref_y += 4 * (u2_ref_wd_y - 4);
1503 pu1_ref_u += 2 * (u2_ref_wd_uv - 4 * YUV420SP_FACTOR);
1504 pu1_dest_y += 4 * (u2_dest_wd_y - 4);
1505 pu1_dest_u += 2 * (u2_dest_wd_uv - 4 * YUV420SP_FACTOR);
1506 }
1507 }
1508 }
1509
ih264d_pad_on_demand(pred_info_t * ps_pred,UWORD8 lum_chrom_blk)1510 void ih264d_pad_on_demand(pred_info_t *ps_pred, UWORD8 lum_chrom_blk)
1511 {
1512 if(CHROM_BLK == lum_chrom_blk)
1513 {
1514 UWORD32 *pu4_pod_src_u, *pu4_pod_dst_u;
1515 UWORD32 *pu4_pod_src_v, *pu4_pod_dst_v;
1516 WORD32 j, u1_wd_stride;
1517 WORD32 i, u1_dma_ht, i1_ht;
1518 UWORD32 u2_dma_size;
1519 u1_wd_stride = (ps_pred->u2_u1_ref_buf_wd >> 2) * YUV420SP_FACTOR;
1520 u1_dma_ht = ps_pred->i1_dma_ht;
1521 u2_dma_size = u1_wd_stride * u1_dma_ht;
1522 pu4_pod_src_u = (UWORD32 *)ps_pred->pu1_dma_dest_addr;
1523 pu4_pod_dst_u = pu4_pod_src_u;
1524
1525 pu4_pod_src_v = pu4_pod_src_u + u2_dma_size;
1526 pu4_pod_dst_v = pu4_pod_src_v;
1527
1528 i1_ht = ps_pred->i1_pod_ht;
1529 pu4_pod_src_u -= u1_wd_stride * i1_ht;
1530 pu4_pod_src_v -= u1_wd_stride * i1_ht;
1531 if(i1_ht < 0)
1532 /* Top POD */
1533 i1_ht = -i1_ht;
1534 else
1535 {
1536 /* Bottom POD */
1537 pu4_pod_src_u += (u1_dma_ht - 1) * u1_wd_stride;
1538 pu4_pod_dst_u += (u1_dma_ht - i1_ht) * u1_wd_stride;
1539 pu4_pod_src_v += (u1_dma_ht - 1) * u1_wd_stride;
1540 pu4_pod_dst_v += (u1_dma_ht - i1_ht) * u1_wd_stride;
1541 }
1542
1543 for(i = 0; i < i1_ht; i++)
1544 for(j = 0; j < u1_wd_stride; j++)
1545 {
1546 *pu4_pod_dst_u++ = *(pu4_pod_src_u + j);
1547
1548 }
1549 }
1550 else
1551 {
1552 UWORD32 *pu4_pod_src, *pu4_pod_dst;
1553 WORD32 j, u1_wd_stride;
1554 WORD32 i, i1_ht;
1555 pu4_pod_src = (UWORD32 *)ps_pred->pu1_dma_dest_addr;
1556 pu4_pod_dst = pu4_pod_src;
1557 u1_wd_stride = ps_pred->u2_u1_ref_buf_wd >> 2;
1558 i1_ht = ps_pred->i1_pod_ht;
1559 pu4_pod_src -= u1_wd_stride * i1_ht;
1560 if(i1_ht < 0)
1561 /* Top POD */
1562 i1_ht = -i1_ht;
1563 else
1564 {
1565 /* Bottom POD */
1566 pu4_pod_src += (ps_pred->i1_dma_ht - 1) * u1_wd_stride;
1567 pu4_pod_dst += (ps_pred->i1_dma_ht - i1_ht) * u1_wd_stride;
1568 }
1569
1570 for(i = 0; i < i1_ht; i++)
1571 for(j = 0; j < u1_wd_stride; j++)
1572 *pu4_pod_dst++ = *(pu4_pod_src + j);
1573 }
1574 }
1575
1576