1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22  *******************************************************************************
23  * @file
24  *  ih264e_mc.c
25  *
26  * @brief
27  *  Contains definition of functions for motion compensation
28  *
29  * @author
30  *  ittiam
31  *
32  * @par List of Functions:
33  *  - ih264e_motion_comp_luma()
34  *  - ih264e_motion_comp_chroma()
35  *
36  * @remarks
37  *  None
38  *
39  *******************************************************************************
40  */
41 
42 /*****************************************************************************/
43 /* File Includes                                                             */
44 /*****************************************************************************/
45 
46 /* System include files */
47 #include <stdio.h>
48 
49 /* User include files */
50 #include "ih264_typedefs.h"
51 #include "ih264_defs.h"
52 #include "iv2.h"
53 #include "ive2.h"
54 #include "ime_distortion_metrics.h"
55 #include "ime_defs.h"
56 #include "ime_structs.h"
57 #include "ih264_structs.h"
58 #include "ih264_inter_pred_filters.h"
59 #include "ih264_mem_fns.h"
60 #include "ih264_padding.h"
61 #include "ih264_intra_pred_filters.h"
62 #include "ih264_deblk_edge_filters.h"
63 #include "ih264_trans_quant_itrans_iquant.h"
64 #include "ih264_cabac_tables.h"
65 #include "ih264e_defs.h"
66 #include "ih264e_error.h"
67 #include "ih264e_bitstream.h"
68 #include "irc_cntrl_param.h"
69 #include "irc_frame_info_collector.h"
70 #include "ih264e_rate_control.h"
71 #include "ih264e_cabac_structs.h"
72 #include "ih264e_structs.h"
73 #include "ih264e_mc.h"
74 #include "ih264e_half_pel.h"
75 
76 /*****************************************************************************/
77 /* Function Definitions                                                      */
78 /*****************************************************************************/
79 
80 /**
81  ******************************************************************************
82  *
83  * @brief
84  *  performs motion compensation for a luma mb for the given mv.
85  *
86  * @par Description
87  *  This routine performs motion compensation of an inter mb. When the inter
88  *  mb mode is P16x16, there is no need to copy 16x16 unit from reference buffer
89  *  to pred buffer. In this case the function returns pointer and stride of the
90  *  ref. buffer and this info is used in place of pred buffer else where.
91  *  In other cases, the pred buffer is populated via copy / filtering + copy
92  *  (q pel cases) and returned.
93  *
94  * @param[in] ps_proc
95  *  pointer to current proc ctxt
96  *
97  * @param[out] pu1_pseudo_pred
98  *  pseudo prediction buffer
99  *
100  * @param[out] u4_pseudo_pred_strd
101  *  pseudo pred buffer stride
102  *
103  * @return  none
104  *
105  * @remarks Assumes half pel buffers for the entire frame are populated.
106  *
107  ******************************************************************************
108  */
ih264e_motion_comp_luma(process_ctxt_t * ps_proc,UWORD8 ** pu1_pseudo_pred,WORD32 * pi4_pseudo_pred_strd)109 void ih264e_motion_comp_luma(process_ctxt_t *ps_proc, UWORD8 **pu1_pseudo_pred,
110                              WORD32 *pi4_pseudo_pred_strd)
111 {
112     /* codec context */
113     codec_t *ps_codec = ps_proc->ps_codec;
114 
115     /* me ctxt */
116     me_ctxt_t *ps_me_ctxt = &ps_proc->s_me_ctxt;
117 
118     /* Pointer to the structure having motion vectors, size and position of curr partitions */
119     enc_pu_t *ps_curr_pu;
120 
121     /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
122     UWORD8 *pu1_ref[4];
123 
124     /* pred buffer ptr */
125     UWORD8 *pu1_pred;
126 
127     /* strides of full pel, half pel x, half pel y, half pel xy reference buffer */
128     WORD32 i4_ref_strd[4];
129 
130     /* pred buffer stride */
131     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
132 
133     /* full pel motion vectors */
134     WORD32 u4_mv_x_full, u4_mv_y_full;
135 
136     /* half pel motion vectors */
137     WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
138 
139     /* quarter pel motion vectors */
140     WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
141 
142     /* width & height of the partition */
143     UWORD32 wd, ht;
144 
145     /* partition idx */
146     UWORD32 u4_num_prtn;
147 
148     /* half / qpel coefficient */
149     UWORD32 u4_subpel_factor;
150 
151     /* BIPRED Flag */
152     WORD32 i4_bipred_flag;
153 
154     /* temp var */
155     UWORD32 u4_lkup_idx1;
156 
157     /* Init */
158     i4_ref_strd[0] = ps_proc->i4_rec_strd;
159 
160     i4_ref_strd[1] = i4_ref_strd[2] = i4_ref_strd[3] =
161                     ps_me_ctxt->u4_subpel_buf_strd;
162 
163     for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
164                     u4_num_prtn++)
165     {
166         mv_t *ps_curr_mv;
167 
168         /* update ptr to curr partition */
169         ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
170 
171         /* Set no no bipred */
172         i4_bipred_flag = 0;
173 
174         switch (ps_curr_pu->b2_pred_mode)
175         {
176             case PRED_L0:
177                 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
178                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
179                 break;
180 
181             case PRED_L1:
182                 ps_curr_mv = &ps_curr_pu->s_me_info[1].s_mv;
183                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[1];
184                 break;
185 
186             case PRED_BI:
187                 /*
188                  * In case of PRED_BI, we only need to ensure that
189                  * the reference buffer that gets selected is
190                  * ps_proc->pu1_best_subpel_buf
191                  */
192 
193                 /* Dummy */
194                 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
195                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
196 
197                 i4_bipred_flag = 1;
198                 break;
199 
200             default:
201                 ps_curr_mv = &ps_curr_pu->s_me_info[0].s_mv;
202                 pu1_ref[0] = ps_proc->apu1_ref_buf_luma[0];
203                 break;
204 
205         }
206 
207         /* get full pel mv's (full pel units) */
208         u4_mv_x_full = ps_curr_mv->i2_mvx >> 2;
209         u4_mv_y_full = ps_curr_mv->i2_mvy >> 2;
210 
211         /* get half pel mv's */
212         u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
213         u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
214 
215         /* get quarter pel mv's */
216         u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
217         u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
218 
219         /* width and height of partition */
220         wd = (ps_curr_pu->b4_wd + 1) << 2;
221         ht = (ps_curr_pu->b4_ht + 1) << 2;
222 
223         /* decision ? qpel/hpel, fpel */
224         u4_subpel_factor = (u4_mv_y_hpel << 3) + (u4_mv_x_hpel << 2)
225                         + (u4_mv_y_qpel << 1) + (u4_mv_x_qpel);
226 
227         /* Move ref to position given by MV */
228         pu1_ref[0] += ((u4_mv_y_full * i4_ref_strd[0]) + u4_mv_x_full);
229 
230         /* Sub pel ptrs/ Biperd pointers init */
231         pu1_ref[1] = ps_proc->pu1_best_subpel_buf;
232         i4_ref_strd[1] = ps_proc->u4_bst_spel_buf_strd;
233 
234         /* update pred buff ptr */
235         pu1_pred = ps_proc->pu1_pred_mb
236                         + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
237                         + 4 * ps_curr_pu->b4_pos_x;
238 
239         /* u4_lkup_idx1 will be non zero for half pel and bipred */
240         u4_lkup_idx1 = ((u4_subpel_factor >> 2) != 0) || i4_bipred_flag;
241 
242         {
243             /********************************************************************/
244             /* if the block is P16x16 MB and mv are not quarter pel motion      */
245             /* vectors, there is no need to copy 16x16 unit from reference frame*/
246             /* to pred buffer. We might as well send the reference frame buffer */
247             /* pointer as pred buffer (ofc with updated stride) to fwd transform*/
248             /* and inverse transform unit.                                      */
249             /********************************************************************/
250             if (ps_proc->u4_num_sub_partitions == 1)
251             {
252                 *pu1_pseudo_pred = pu1_ref[u4_lkup_idx1];
253                 *pi4_pseudo_pred_strd = i4_ref_strd[u4_lkup_idx1];
254 
255             }
256             /*
257              * Copying half pel or full pel to prediction buffer
258              * Currently ps_proc->u4_num_sub_partitions will always be 1 as we only support 16x16 in P mbs
259              */
260             else
261             {
262                 ps_codec->pf_inter_pred_luma_copy(pu1_ref[u4_lkup_idx1],
263                                                   pu1_pred,
264                                                   i4_ref_strd[u4_lkup_idx1],
265                                                   i4_pred_strd, ht, wd, NULL,
266                                                   0);
267             }
268 
269         }
270     }
271 }
272 
273 /**
274  ******************************************************************************
275  *
276  * @brief
277  *  performs motion compensation for chroma mb
278  *
279  * @par   Description
280  *  Copies a MB of data from the reference buffer (Full pel, half pel or q pel)
281  *  according to the motion vectors given
282  *
283  * @param[in] ps_proc
284  *  pointer to current proc ctxt
285  *
286  * @return  none
287  *
288  * @remarks Assumes half pel and quarter pel buffers for the entire frame are
289  *  populated.
290  ******************************************************************************
291  */
ih264e_motion_comp_chroma(process_ctxt_t * ps_proc)292 void ih264e_motion_comp_chroma(process_ctxt_t *ps_proc)
293 {
294     /* codec context */
295     codec_t *ps_codec = ps_proc->ps_codec;
296 
297     /* Pointer to the structure having motion vectors, size and position of curr partitions */
298     enc_pu_t *ps_curr_pu;
299 
300     /* pointers to full pel, half pel x, half pel y, half pel xy reference buffer */
301     UWORD8 *pu1_ref;
302 
303     /* pred buffer ptr */
304     UWORD8 *pu1_pred;
305 
306     /* strides of full pel reference buffer */
307     WORD32 i4_ref_strd = ps_proc->i4_rec_strd;
308 
309     /* pred buffer stride */
310     WORD32 i4_pred_strd = ps_proc->i4_pred_strd;
311 
312     /* full pel motion vectors */
313     WORD32 u4_mv_x_full, u4_mv_y_full;
314 
315     /* half pel motion vectors */
316     WORD32 u4_mv_x_hpel, u4_mv_y_hpel;
317 
318     /* quarter pel motion vectors */
319     WORD32 u4_mv_x_qpel, u4_mv_y_qpel;
320 
321     /* width & height of the partition */
322     UWORD32 wd, ht;
323 
324     /* partition idx */
325     UWORD32 u4_num_prtn;
326 
327     WORD32 u4_mv_x;
328     WORD32 u4_mv_y;
329     UWORD8 u1_dx, u1_dy;
330 
331     for (u4_num_prtn = 0; u4_num_prtn < ps_proc->u4_num_sub_partitions;
332                     u4_num_prtn++)
333     {
334         mv_t *ps_curr_mv;
335 
336         ps_curr_pu = ps_proc->ps_pu + u4_num_prtn;
337 
338         if (ps_curr_pu->b2_pred_mode != PRED_BI)
339         {
340             ps_curr_mv = &ps_curr_pu->s_me_info[ps_curr_pu->b2_pred_mode].s_mv;
341             pu1_ref = ps_proc->apu1_ref_buf_chroma[ps_curr_pu->b2_pred_mode];
342 
343             u4_mv_x = ps_curr_mv->i2_mvx >> 3;
344             u4_mv_y = ps_curr_mv->i2_mvy >> 3;
345 
346             /*  corresponds to full pel motion vector in luma, but in chroma corresponds to pel formed wiith dx, dy =4 */
347             u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
348             u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
349 
350             /* get half pel mv's */
351             u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
352             u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
353 
354             /* get quarter pel mv's */
355             u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
356             u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
357 
358             /* width and height of sub macro block */
359             wd = (ps_curr_pu->b4_wd + 1) << 1;
360             ht = (ps_curr_pu->b4_ht + 1) << 1;
361 
362             /* move the pointers so that they point to the motion compensated locations */
363             pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
364 
365             pu1_pred = ps_proc->pu1_pred_mb
366                             + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
367                             + 2 * ps_curr_pu->b4_pos_x;
368 
369             u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1) + (u4_mv_x_qpel);
370             u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1) + (u4_mv_y_qpel);
371 
372             /* cases where u1_dx = 0 or u1_dy = 0 are dealt separately in neon with
373              * separate functions for better performance
374              *
375              * ih264_inter_pred_chroma_dx_zero_a9q
376              * and
377              * ih264_inter_pred_chroma_dy_zero_a9q
378              */
379 
380             ps_codec->pf_inter_pred_chroma(pu1_ref, pu1_pred, i4_ref_strd,
381                                            i4_pred_strd, u1_dx, u1_dy, ht, wd);
382         }
383         else /* If the pred mode is PRED_BI */
384         {
385             /*
386              * We need to interpolate the L0 and L1 ref pics with the chorma MV
387              * then use them to average for bilinrar interpred
388              */
389             WORD32 i4_predmode;
390             UWORD8 *pu1_ref_buf[2];
391 
392             /* Temporary buffers to store the interpolated value from L0 and L1 */
393             pu1_ref_buf[PRED_L0] = ps_proc->apu1_subpel_buffs[0];
394             pu1_ref_buf[PRED_L1] = ps_proc->apu1_subpel_buffs[1];
395 
396 
397             for (i4_predmode = 0; i4_predmode < PRED_BI; i4_predmode++)
398             {
399                 ps_curr_mv = &ps_curr_pu->s_me_info[i4_predmode].s_mv;
400                 pu1_ref = ps_proc->apu1_ref_buf_chroma[i4_predmode];
401 
402                 u4_mv_x = ps_curr_mv->i2_mvx >> 3;
403                 u4_mv_y = ps_curr_mv->i2_mvy >> 3;
404 
405                 /*
406                  * corresponds to full pel motion vector in luma, but in chroma
407                  * corresponds to pel formed wiith dx, dy =4
408                  */
409                 u4_mv_x_full = (ps_curr_mv->i2_mvx & 0x4) >> 2;
410                 u4_mv_y_full = (ps_curr_mv->i2_mvy & 0x4) >> 2;
411 
412                 /* get half pel mv's */
413                 u4_mv_x_hpel = (ps_curr_mv->i2_mvx & 0x2) >> 1;
414                 u4_mv_y_hpel = (ps_curr_mv->i2_mvy & 0x2) >> 1;
415 
416                 /* get quarter pel mv's */
417                 u4_mv_x_qpel = (ps_curr_mv->i2_mvx & 0x1);
418                 u4_mv_y_qpel = (ps_curr_mv->i2_mvy & 0x1);
419 
420                 /* width and height of sub macro block */
421                 wd = (ps_curr_pu->b4_wd + 1) << 1;
422                 ht = (ps_curr_pu->b4_ht + 1) << 1;
423 
424                 /* move the pointers so that they point to the motion compensated locations */
425                 pu1_ref += ((u4_mv_y * i4_ref_strd) + (u4_mv_x << 1));
426 
427                 pu1_pred = ps_proc->pu1_pred_mb
428                                 + 4 * ps_curr_pu->b4_pos_y * i4_pred_strd
429                                 + 2 * ps_curr_pu->b4_pos_x;
430 
431                 u1_dx = (u4_mv_x_full << 2) + (u4_mv_x_hpel << 1)
432                                 + (u4_mv_x_qpel);
433                 u1_dy = (u4_mv_y_full << 2) + (u4_mv_y_hpel << 1)
434                                 + (u4_mv_y_qpel);
435 
436                 ps_codec->pf_inter_pred_chroma(pu1_ref,
437                                                pu1_ref_buf[i4_predmode],
438                                                i4_ref_strd, MB_SIZE, u1_dx,
439                                                u1_dy, ht, wd);
440             }
441 
442             ps_codec->pf_inter_pred_luma_bilinear(pu1_ref_buf[PRED_L0],
443                                                   pu1_ref_buf[PRED_L1], pu1_pred,
444                                                   MB_SIZE, MB_SIZE,
445                                                   i4_pred_strd, MB_SIZE >> 1,
446                                                   MB_SIZE);
447         }
448     }
449 }
450