1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*****************************************************************************/
22 /* File Includes                                                             */
23 /*****************************************************************************/
24 /* System include files */
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <stdarg.h>
30 #include <math.h>
31 #include <limits.h>
32 
33 /* User include files */
34 #include "ihevc_typedefs.h"
35 #include "itt_video_api.h"
36 #include "ihevce_api.h"
37 
38 #include "rc_cntrl_param.h"
39 #include "rc_frame_info_collector.h"
40 #include "rc_look_ahead_params.h"
41 
42 #include "ihevc_defs.h"
43 #include "ihevc_structs.h"
44 #include "ihevc_platform_macros.h"
45 #include "ihevc_deblk.h"
46 #include "ihevc_itrans_recon.h"
47 #include "ihevc_chroma_itrans_recon.h"
48 #include "ihevc_chroma_intra_pred.h"
49 #include "ihevc_intra_pred.h"
50 #include "ihevc_inter_pred.h"
51 #include "ihevc_mem_fns.h"
52 #include "ihevc_padding.h"
53 #include "ihevc_weighted_pred.h"
54 #include "ihevc_sao.h"
55 #include "ihevc_resi_trans.h"
56 #include "ihevc_quant_iquant_ssd.h"
57 #include "ihevc_cabac_tables.h"
58 
59 #include "ihevce_defs.h"
60 #include "ihevce_lap_enc_structs.h"
61 #include "ihevce_multi_thrd_structs.h"
62 #include "ihevce_multi_thrd_funcs.h"
63 #include "ihevce_me_common_defs.h"
64 #include "ihevce_had_satd.h"
65 #include "ihevce_error_codes.h"
66 #include "ihevce_bitstream.h"
67 #include "ihevce_cabac.h"
68 #include "ihevce_rdoq_macros.h"
69 #include "ihevce_function_selector.h"
70 #include "ihevce_enc_structs.h"
71 #include "ihevce_entropy_structs.h"
72 #include "ihevce_cmn_utils_instr_set_router.h"
73 #include "ihevce_enc_loop_structs.h"
74 #include "ihevce_inter_pred.h"
75 #include "ihevce_global_tables.h"
76 #include "ihevce_dep_mngr_interface.h"
77 #include "hme_datatype.h"
78 #include "hme_interface.h"
79 #include "hme_common_defs.h"
80 #include "hme_defs.h"
81 #include "ihevce_me_instr_set_router.h"
82 #include "hme_globals.h"
83 #include "hme_utils.h"
84 #include "hme_coarse.h"
85 #include "hme_fullpel.h"
86 #include "hme_subpel.h"
87 #include "hme_refine.h"
88 #include "hme_err_compute.h"
89 #include "hme_common_utils.h"
90 #include "hme_search_algo.h"
91 #include "ihevce_stasino_helpers.h"
92 #include "ihevce_common_utils.h"
93 
94 /*****************************************************************************/
95 /* Macros                                                                    */
96 /*****************************************************************************/
97 #define UNI_SATD_SCALE 1
98 
99 /*****************************************************************************/
100 /* Function Definitions                                                      */
101 /*****************************************************************************/
ihevce_open_loop_pred_data(me_frm_ctxt_t * ps_ctxt,inter_pu_results_t * ps_pu_results,U08 * pu1_src,U08 * pu1_temp_pred,S32 stride,S32 src_strd,UWORD8 e_part_id)102 void ihevce_open_loop_pred_data(
103     me_frm_ctxt_t *ps_ctxt,
104     inter_pu_results_t *ps_pu_results,
105     U08 *pu1_src,
106     U08 *pu1_temp_pred,
107     S32 stride,
108     S32 src_strd,
109     UWORD8 e_part_id)
110 {
111     S32 best_sad_l0 = -1, best_sad_l1 = -1;
112     S32 sad_diff, status;
113     inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt;
114     U08 enable_bi = 0;
115     pu_t s_pu;
116 
117     ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt;
118     ps_ctxt->i4_count++;
119     /* L0*/
120     if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
121     {
122         pu_result_t *ps_best_l0_pu;
123         ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N];
124         best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost;
125         s_pu.b2_pred_mode = PRED_L0;
126         s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht;
127         s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd;
128         s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x;
129         s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y;
130         s_pu.b1_intra_flag = 0;
131         s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx;
132         s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy;
133         s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx;
134     }
135     /*L1*/
136     if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
137     {
138         pu_result_t *ps_best_l1_pu;
139         ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N];
140         best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost;
141         s_pu.b2_pred_mode = PRED_L1;
142         s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht;
143         s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd;
144         s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x;
145         s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y;
146         s_pu.b1_intra_flag = 0;
147         s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx;
148         s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy;
149         s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx;
150     }
151     ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1));
152     /*bi selection*/
153     if((best_sad_l0 != -1) && (best_sad_l1 != -1))
154     {
155         sad_diff = abs(best_sad_l0 - best_sad_l1);
156         if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15)))
157         {
158             enable_bi = 1;
159             s_pu.b2_pred_mode = PRED_BI;
160         }
161         if(!enable_bi)
162         {
163             if(best_sad_l0 < best_sad_l1)
164             {
165                 s_pu.b2_pred_mode = PRED_L0;
166             }
167             else
168             {
169                 s_pu.b2_pred_mode = PRED_L1;
170             }
171         }
172     }
173     status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1);
174     if(status == -1)
175     {
176         ASSERT(0);
177     }
178 }
179 
180 /**
181 ********************************************************************************
182 *  @fn     void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
183 *
184 *  @brief  Allocates a block of size = i4_size from working memory and returns
185 *
186 *  @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
187 *
188 *  @param[in]  i4_size : size required
189 *
190 *  @return void pointer to allocated memory, NULL if failure
191 ********************************************************************************
192 */
hme_get_wkg_mem(buf_mgr_t * ps_buf_mgr,S32 i4_size)193 void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
194 {
195     U08 *pu1_mem;
196 
197     if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total)
198         return NULL;
199 
200     pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used;
201     ps_buf_mgr->i4_used += i4_size;
202 
203     return ((void *)pu1_mem);
204 }
205 
206 /**
207 ********************************************************************************
208 *  @fn     hme_init_histogram(
209 *
210 *  @brief  Top level entry point for Coarse ME. Runs across blocks and does the
211 *          needful by calling other low level routines.
212 *
213 *  @param[in,out]  ps_hist : the histogram structure
214 *
215 *  @param[in]  i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
216 *
217 *  @param[in]  i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
218 *
219 *  @return None
220 ********************************************************************************
221 */
222 
hme_init_histogram(mv_hist_t * ps_hist,S32 i4_max_mv_x,S32 i4_max_mv_y)223 void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y)
224 {
225     S32 i4_num_bins, i4_num_cols, i4_num_rows;
226     S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val;
227 
228     /*************************************************************************/
229     /* Evaluate the shift_x and shift_y. For this, we use the following logic*/
230     /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is  */
231     /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/
232     /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/
233     /* if above quantity is negative, then we make it zero.                  */
234     /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */
235     /*************************************************************************/
236     i4_val = i4_max_mv_x * i4_max_mv_y * 4;
237     i4_range = (hme_get_range(i4_val - 1)) + 1;
238     if(i4_range > LOG_MAX_NUM_BINS)
239     {
240         i4_shift_y = (i4_range - LOG_MAX_NUM_BINS);
241         i4_shift_x = (i4_shift_y + 1) >> 1;
242         i4_shift_y >>= 1;
243     }
244     else
245     {
246         i4_shift_y = 0;
247         i4_shift_x = 0;
248     }
249 
250     /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */
251     /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */
252     /* this helps us compute num bins that are active for this histo session */
253     i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x;
254     i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y;
255     i4_num_bins = i4_num_rows * i4_num_cols;
256 
257     ASSERT(i4_num_bins <= MAX_NUM_BINS);
258 
259     ps_hist->i4_num_rows = i4_num_rows;
260     ps_hist->i4_num_cols = i4_num_cols;
261     ps_hist->i4_min_x = -i4_max_mv_x;
262     ps_hist->i4_min_y = -i4_max_mv_y;
263     ps_hist->i4_shift_x = i4_shift_x;
264     ps_hist->i4_shift_y = i4_shift_y;
265     ps_hist->i4_lobe1_size = 5;
266     ps_hist->i4_lobe2_size = 3;
267 
268     ps_hist->i4_num_bins = i4_num_bins;
269 
270     for(i = 0; i < i4_num_bins; i++)
271     {
272         ps_hist->ai4_bin_count[i] = 0;
273     }
274 }
275 
276 /**
277 ********************************************************************************
278 *  @fn     hme_update_histogram(
279 *
280 *  @brief  Updates the histogram given an mv entry
281 *
282 *  @param[in,out]  ps_hist : the histogram structure
283 *
284 *  @param[in]  i4_mv_x : x component of the mv (fpel units)
285 *
286 *  @param[in]  i4_mv_y : y component of the mv (fpel units)
287 *
288 *  @return None
289 ********************************************************************************
290 */
hme_update_histogram(mv_hist_t * ps_hist,S32 i4_mv_x,S32 i4_mv_y)291 void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y)
292 {
293     S32 i4_bin_index, i4_col, i4_row;
294 
295     i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x;
296     i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y;
297 
298     i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols);
299     /* Sanity Check */
300     ASSERT(i4_bin_index < MAX_NUM_BINS);
301 
302     ps_hist->ai4_bin_count[i4_bin_index]++;
303 }
304 
305 /**
306 ********************************************************************************
307 *  @fn     hme_get_global_mv(
308 *
309 *  @brief  returns the global mv of a previous picture. Accounts for the fact
310 *          that the delta poc of the previous picture may have been different
311 *          from delta poc of current picture. Delta poc is POC difference
312 *          between a picture and its reference.
313 *
314 *  @param[out]  ps_mv: mv_t structure where the motion vector is returned
315 *
316 *  @param[in]  i4_delta_poc: the delta poc for the current pic w.r.t. reference
317 *
318 *  @return None
319 ********************************************************************************
320 */
hme_get_global_mv(layer_ctxt_t * ps_prev_layer,hme_mv_t * ps_mv,S32 i4_delta_poc)321 void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc)
322 {
323     S16 i2_mv_x, i2_mv_y;
324     S32 i4_delta_poc_prev;
325     S32 i4_poc_prev = ps_prev_layer->i4_poc;
326     S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0];
327 
328     i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref;
329     i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x;
330     i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y;
331 
332     i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev);
333     i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev);
334 
335     ps_mv->i2_mv_x = i2_mv_x;
336     ps_mv->i2_mv_y = i2_mv_y;
337 }
338 
339 /**
340 ********************************************************************************
341 *  @fn     hme_calculate_global_mv(
342 *
343 *  @brief  Calculates global mv for a given histogram
344 *
345 *  @param[in]  ps_hist : the histogram structure
346 *
347 *  @param[in]  ps_mv : used to return the global mv
348 *
349 *  @param[in]  e_lobe_type : refer to GMV_MVTYPE_T
350 *
351 *  @return None
352 ********************************************************************************
353 */
hme_calculate_global_mv(mv_hist_t * ps_hist,hme_mv_t * ps_mv,GMV_MVTYPE_T e_lobe_type)354 void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type)
355 {
356     S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count;
357     S32 i4_max_sum = -1;
358     S32 i4_max_x = 0, i4_max_y = 0;
359 
360     if(e_lobe_type == GMV_THICK_LOBE)
361         i4_lobe_size = ps_hist->i4_lobe1_size;
362     else
363         i4_lobe_size = ps_hist->i4_lobe2_size;
364 
365     i4_offset = i4_lobe_size >> 1;
366     for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++)
367     {
368         for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++)
369         {
370             S32 i4_bin_id, i4_sum;
371             i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols);
372 
373             pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id];
374             i4_sum = hme_compute_2d_sum_unsigned(
375                 (void *)pi4_bin_count,
376                 i4_lobe_size,
377                 i4_lobe_size,
378                 ps_hist->i4_num_cols,
379                 sizeof(U32));
380 
381             if(i4_sum > i4_max_sum)
382             {
383                 i4_max_x = i4_x;
384                 i4_max_y = i4_y;
385                 i4_max_sum = i4_sum;
386             }
387         }
388     }
389 
390     ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y);
391     ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x);
392 }
393 
394 /**
395 ********************************************************************************
396 *  @fn    ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
397 *
398 *  @brief  returns a new ctb node usable for creating a new ctb candidate
399 *
400 *  @param[in] ps_mem_mgr : memory manager holding all ctb nodes
401 *
402 *  @return NULL if no free nodes, else ptr to the new ctb node
403 ********************************************************************************
404 */
hme_get_ctb_node(ctb_mem_mgr_t * ps_mem_mgr)405 ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
406 {
407     U08 *pu1_ret;
408     if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot)
409         return (NULL);
410     pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used;
411     ps_mem_mgr->i4_used += ps_mem_mgr->i4_size;
412     return ((ctb_node_t *)pu1_ret);
413 }
414 
415 /**
416 ********************************************************************************
417 *  @fn     hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
418 search_results_t *ps_search_results, S32 i4_num_ref)
419 *
420 *  @brief  For a given CU whose results are in ps_search_results, the 17x17
421 *          mv grid is updated for future use within the CTB
422 *
423 *  @param[in] ps_search_results : Search results data structure
424 *
425 *  @param[out] pps_mv_grid: The mv grid (as many as num ref)
426 *
427 *  @param[in]  i4_num_ref: nuber of search iterations to update
428 *
429 *  @return None
430 ********************************************************************************
431 */
hme_map_mvs_to_grid(mv_grid_t ** pps_mv_grid,search_results_t * ps_search_results,U08 * pu1_pred_dir_searched,S32 i4_num_pred_dir)432 void hme_map_mvs_to_grid(
433     mv_grid_t **pps_mv_grid,
434     search_results_t *ps_search_results,
435     U08 *pu1_pred_dir_searched,
436     S32 i4_num_pred_dir)
437 {
438     S32 i4_cu_start_offset;
439     /*************************************************************************/
440     /* Start x, y offset of CU relative to CTB. To update the mv grid which  */
441     /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset        */
442     /*************************************************************************/
443     S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2;
444     S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2;
445 
446     /* Controls the attribute of a given partition within CU   */
447     /* , i.e. start locn, size                                 */
448     part_attr_t *ps_part_attr;
449 
450     S32 i4_part, i4_part_id, num_parts, i4_stride;
451     S16 i2_mv_x, i2_mv_y;
452     S08 i1_ref_idx;
453 
454     /* Per partition, attributes w.r.t. CU start */
455     S32 x_start, y_start, x_end, y_end, i4_x, i4_y;
456     PART_TYPE_T e_part_type;
457 
458     /* Points to exact mv structures within the grid to be udpated */
459     search_node_t *ps_grid_node, *ps_grid_node_tmp;
460 
461     /* points to exact mv grid (based on search iteration) to be updated */
462     mv_grid_t *ps_mv_grid;
463 
464     search_node_t *ps_search_node;
465 
466     S32 shift, i, mv_shift = 2;
467     /* Proportional to the size of CU, controls the number of 4x4 blks */
468     /* to be updated                                                   */
469     shift = ps_search_results->e_cu_size;
470     ASSERT(i4_num_pred_dir <= 2);
471 
472     e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
473 
474     if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
475        (ps_search_results->i4_part_mask & ENABLE_NxN))
476     {
477         e_part_type = PRT_NxN;
478     }
479 
480     for(i = 0; i < i4_num_pred_dir; i++)
481     {
482         num_parts = gau1_num_parts_in_part_type[e_part_type];
483         ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]];
484         i4_stride = ps_mv_grid->i4_stride;
485 
486         i4_cu_start_offset =
487             i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset;
488 
489         /* Move to the appropriate 2d locn of CU start within Grid */
490         ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset];
491 
492         for(i4_part = 0; i4_part < num_parts; i4_part++)
493         {
494             i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part];
495 
496             /* Pick the mvx and y and ref id corresponding to this partition */
497             ps_search_node =
498                 ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
499 
500             i2_mv_x = ps_search_node->s_mv.i2_mvx;
501             i2_mv_y = ps_search_node->s_mv.i2_mvy;
502             i1_ref_idx = ps_search_node->i1_ref_idx;
503 
504             /* Move to the appropriate location within the CU */
505             ps_part_attr = &gas_part_attr_in_cu[i4_part_id];
506             x_start = ps_part_attr->u1_x_start;
507             x_end = x_start + ps_part_attr->u1_x_count;
508             y_start = ps_part_attr->u1_y_start;
509             y_end = y_start + ps_part_attr->u1_y_count;
510 
511             /* Convert attributes from 8x8 CU size to given CU size */
512             x_start = (x_start << shift) >> mv_shift;
513             x_end = (x_end << shift) >> mv_shift;
514             y_start = (y_start << shift) >> mv_shift;
515             y_end = (y_end << shift) >> mv_shift;
516 
517             ps_grid_node_tmp = ps_grid_node + y_start * i4_stride;
518 
519             /* Update all 4x4 blk mvs with the part mv */
520             /* For e.g. we update 4 units in case of NxN for 16x16 CU */
521             for(i4_y = y_start; i4_y < y_end; i4_y++)
522             {
523                 for(i4_x = x_start; i4_x < x_end; i4_x++)
524                 {
525                     ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x;
526                     ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y;
527                     ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx;
528                     ps_grid_node_tmp[i4_x].u1_subpel_done = 1;
529                 }
530                 ps_grid_node_tmp += i4_stride;
531             }
532         }
533     }
534 }
535 
hme_set_ctb_pred_attr(ctb_node_t * ps_parent,U08 * pu1_pred0,U08 * pu1_pred1,S32 i4_stride)536 void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride)
537 {
538     ps_parent->apu1_pred[0] = pu1_pred0;
539     ps_parent->apu1_pred[1] = pu1_pred1;
540     ps_parent->i4_pred_stride = i4_stride;
541     if(ps_parent->ps_tl != NULL)
542     {
543         S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off;
544         blk_wd -= (S32)ps_parent->u1_x_off;
545 
546         hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1);
547 
548         hme_set_ctb_pred_attr(
549             ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1);
550 
551         hme_set_ctb_pred_attr(
552             ps_parent->ps_bl,
553             pu1_pred0 + (blk_wd * i4_stride),
554             pu1_pred1 + (blk_wd * i4_stride),
555             i4_stride >> 1);
556 
557         hme_set_ctb_pred_attr(
558             ps_parent->ps_tr,
559             pu1_pred0 + (blk_wd * (1 + i4_stride)),
560             pu1_pred1 + (blk_wd * (1 + i4_stride)),
561             i4_stride >> 1);
562     }
563 }
564 
565 /**
566 ********************************************************************************
567 *  @fn     hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
568 *
569 *  @brief  Expands the part mask to a list of valid part ids terminated by -1
570 *
571 *  @param[in] i4_part_mask : bit mask of active partitino ids
572 *
573 *  @param[out] pi4_valid_part_ids : array, each entry has one valid part id
574 *               Terminated by -1 to signal end.
575 *
576 *  @return number of partitions
577 ********************************************************************************
578 */
hme_create_valid_part_ids(S32 i4_part_mask,S32 * pi4_valid_part_ids)579 S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
580 {
581     S32 id = 0, i;
582     for(i = 0; i < TOT_NUM_PARTS; i++)
583     {
584         if(i4_part_mask & (1 << i))
585         {
586             pi4_valid_part_ids[id] = i;
587             id++;
588         }
589     }
590     pi4_valid_part_ids[id] = -1;
591 
592     return id;
593 }
594 
595 ctb_boundary_attrs_t *
get_ctb_attrs(S32 ctb_start_x,S32 ctb_start_y,S32 pic_wd,S32 pic_ht,me_frm_ctxt_t * ps_ctxt)596     get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt)
597 {
598     S32 horz_crop, vert_crop;
599     ctb_boundary_attrs_t *ps_attrs;
600 
601     horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0;
602     vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0;
603     switch(horz_crop + vert_crop)
604     {
605     case 0:
606         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE];
607         break;
608     case 1:
609         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY];
610         break;
611     case 2:
612         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY];
613         break;
614     case 3:
615         ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY];
616         break;
617     }
618     return (ps_attrs);
619 }
620 
621 /**
622 ********************************************************************************
623 *  @fn     hevc_avg_2d(U08 *pu1_src1,
624 *                   U08 *pu1_src2,
625 *                   S32 i4_src1_stride,
626 *                   S32 i4_src2_stride,
627 *                   S32 i4_blk_wd,
628 *                   S32 i4_blk_ht,
629 *                   U08 *pu1_dst,
630 *                   S32 i4_dst_stride)
631 *
632 *
633 *  @brief  point wise average of two buffers into a third buffer
634 *
635 *  @param[in] pu1_src1 : first source buffer
636 *
637 *  @param[in] pu1_src2 : 2nd source buffer
638 *
639 *  @param[in] i4_src1_stride : stride of source 1 buffer
640 *
641 *  @param[in] i4_src2_stride : stride of source 2 buffer
642 *
643 *  @param[in] i4_blk_wd : block width
644 *
645 *  @param[in] i4_blk_ht : block height
646 *
647 *  @param[out] pu1_dst : destination buffer
648 *
649 *  @param[in] i4_dst_stride : stride of the destination buffer
650 *
651 *  @return void
652 ********************************************************************************
653 */
hevc_avg_2d(U08 * pu1_src1,U08 * pu1_src2,S32 i4_src1_stride,S32 i4_src2_stride,S32 i4_blk_wd,S32 i4_blk_ht,U08 * pu1_dst,S32 i4_dst_stride)654 void hevc_avg_2d(
655     U08 *pu1_src1,
656     U08 *pu1_src2,
657     S32 i4_src1_stride,
658     S32 i4_src2_stride,
659     S32 i4_blk_wd,
660     S32 i4_blk_ht,
661     U08 *pu1_dst,
662     S32 i4_dst_stride)
663 {
664     S32 i, j;
665 
666     for(i = 0; i < i4_blk_ht; i++)
667     {
668         for(j = 0; j < i4_blk_wd; j++)
669         {
670             pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1;
671         }
672         pu1_src1 += i4_src1_stride;
673         pu1_src2 += i4_src2_stride;
674         pu1_dst += i4_dst_stride;
675     }
676 }
677 /**
678 ********************************************************************************
679 *  @fn     hme_pick_back_search_node(search_results_t *ps_search_results,
680 *                                   search_node_t *ps_search_node_fwd,
681 *                                   S32 i4_part_idx,
682 *                                   layer_ctxt_t *ps_curr_layer)
683 *
684 *
685 *  @brief  returns the search node corresponding to a ref idx in same or
686 *          opp direction. Preference is given to opp direction, but if that
687 *          does not yield results, same direction is attempted.
688 *
689 *  @param[in] ps_search_results: search results overall
690 *
691 *  @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
692 *
693 *  @param[in] i4_part_idx : partition id
694 *
695 *  @param[in] ps_curr_layer : layer context for current layer.
696 *
697 *  @return search node corresponding to hte "other direction"
698 ********************************************************************************
699 */
700 //#define PICK_L1_REF_SAME_DIR
hme_pick_back_search_node(search_results_t * ps_search_results,search_node_t * ps_search_node_fwd,S32 i4_part_idx,layer_ctxt_t * ps_curr_layer)701 search_node_t *hme_pick_back_search_node(
702     search_results_t *ps_search_results,
703     search_node_t *ps_search_node_fwd,
704     S32 i4_part_idx,
705     layer_ctxt_t *ps_curr_layer)
706 {
707     S32 is_past_l0, is_past_l1, id, i, i4_poc;
708     S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc;
709     //ref_attr_t *ps_ref_attr_lc;
710     S08 i1_ref_idx_fwd;
711     S16 i2_mv_x, i2_mv_y;
712     search_node_t *ps_search_node;
713 
714     i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx;
715     i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx;
716     i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy;
717     i4_poc = ps_curr_layer->i4_poc;
718 
719     //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0];
720     /* If the ref id already picked up maps to a past pic, then we pick */
721     /* a result corresponding to future pic. If such a result is not    */
722     /* to be found, then we pick a result corresponding to a past pic   */
723     //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past;
724     is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0;
725 
726     ASSERT(ps_search_results->u1_num_active_ref <= 2);
727 
728     /* pick the right iteration of search nodes to pick up */
729 #ifdef PICK_L1_REF_SAME_DIR
730     if(ps_search_results->u1_num_active_ref == 2)
731         id = !is_past_l0;
732 #else
733     if(ps_search_results->u1_num_active_ref == 2)
734         id = is_past_l0;
735 #endif
736     else
737         id = 0;
738 
739     ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx];
740 
741     for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
742     {
743         S08 i1_ref_test = ps_search_node[i].i1_ref_idx;
744         is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0;
745         //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past)
746 #ifdef PICK_L1_REF_SAME_DIR
747         if(is_past_l1 == is_past_l0)
748 #else
749         if(is_past_l1 != is_past_l0)
750 #endif
751         {
752             /* belongs to same direction as the ref idx passed, so continue */
753             return (ps_search_node + i);
754         }
755     }
756 
757     /* Unable to find best result in opp direction, so try same direction */
758     /* However we need to ensure that we do not pick up same result       */
759     for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
760     {
761         if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) ||
762            (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y))
763         {
764             return (ps_search_node);
765         }
766         ps_search_node++;
767     }
768 
769     //ASSERT(0);
770     return (ps_search_results->aps_part_results[id][i4_part_idx]);
771 
772     //return (NULL);
773 }
774 
775 /**
776 ********************************************************************************
777 *  @fn     hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride)
778 *
779 *
780 *  @brief  Examines input 16x16 for possible edges and orientations of those,
781 *          and returns a bit mask of partitions that should be searched for
782 *
783 *  @param[in] pu1_inp : input buffer
784 *
785 *  @param[in] i4_inp_stride: input stride
786 *
787 *  @return part mask (bit mask of active partitions to search)
788 ********************************************************************************
789 */
790 
hme_study_input_segmentation(U08 * pu1_inp,S32 i4_inp_stride,S32 limit_active_partitions)791 S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions)
792 {
793     S32 i4_rsum[16], i4_csum[16];
794     U08 *pu1_tmp, u1_tmp;
795     S32 i4_max_ridx, i4_max_cidx, i4_tmp;
796     S32 i, j, i4_ret;
797     S32 i4_max_rp[4], i4_max_cp[4];
798     S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N };
799     S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD };
800 #define EDGE_THR (15 * 16)
801 #define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2]))
802 
803     if(0 == limit_active_partitions)
804     {
805         /*********************************************************************/
806         /* In this case, we do not optimize on active partitions and search  */
807         /* brute force. This way, 17 partitinos would be enabled.            */
808         /*********************************************************************/
809         return (ENABLE_ALL_PARTS);
810     }
811 
812     /*************************************************************************/
813     /* Control passes below in case we wish to optimize on active partitions.*/
814     /* This is based on input characteristics, check how an edge passes along*/
815     /* an input 16x16 area, if at all, and decide active partitinos.         */
816     /*************************************************************************/
817 
818     /* Initialize row and col sums */
819     for(i = 0; i < 16; i++)
820     {
821         i4_rsum[i] = 0;
822         i4_csum[i] = 0;
823     }
824     pu1_tmp = pu1_inp;
825     for(i = 0; i < 16; i++)
826     {
827         for(j = 0; j < 16; j++)
828         {
829             u1_tmp = *pu1_tmp++;
830             i4_rsum[i] += u1_tmp;
831             i4_csum[j] += u1_tmp;
832         }
833         pu1_tmp += (i4_inp_stride - 16);
834     }
835 
836     /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */
837     i4_max_rp[0] = 0;
838     i4_max_cp[0] = 0;
839     i4_max_rp[1] = 0;
840     i4_max_cp[1] = 0;
841     i4_max_rp[2] = 0;
842     i4_max_cp[2] = 0;
843     i4_max_rp[3] = 0;
844     i4_max_cp[3] = 0;
845 
846     /* Get Max edge strength across (2,3) (3,4) (4,5) */
847     for(i = 3; i < 6; i++)
848     {
849         /* Run [-1 -2 2 1] filter through rsum/csum */
850         i4_tmp = HI_PASS(i4_rsum, i);
851         if(ABS(i4_tmp) > i4_max_rp[1])
852             i4_max_rp[1] = i4_tmp;
853 
854         i4_tmp = HI_PASS(i4_csum, i);
855         if(ABS(i4_tmp) > i4_max_cp[1])
856             i4_max_cp[1] = i4_tmp;
857     }
858 
859     /* Get Max edge strength across (6,7) (7,8) (8,9) */
860     for(i = 7; i < 10; i++)
861     {
862         /* Run [-1 -2 2 1] filter through rsum/csum */
863         i4_tmp = HI_PASS(i4_rsum, i);
864         if(ABS(i4_tmp) > i4_max_rp[2])
865             i4_max_rp[2] = i4_tmp;
866 
867         i4_tmp = HI_PASS(i4_csum, i);
868         if(ABS(i4_tmp) > i4_max_cp[2])
869             i4_max_cp[2] = i4_tmp;
870     }
871 
872     /* Get Max edge strength across (10,11) (11,12) (12,13) */
873     for(i = 11; i < 14; i++)
874     {
875         /* Run [-1 -2 2 1] filter through rsum/csum */
876         i4_tmp = HI_PASS(i4_rsum, i);
877         if(ABS(i4_tmp) > i4_max_rp[3])
878             i4_max_rp[3] = i4_tmp;
879 
880         i4_tmp = HI_PASS(i4_csum, i);
881         if(ABS(i4_tmp) > i4_max_cp[3])
882             i4_max_cp[3] = i4_tmp;
883     }
884 
885     /* Find the maximum across the 3 and see whether the strength qualifies as edge */
886     i4_max_ridx = 1;
887     i4_max_cidx = 1;
888     for(i = 2; i <= 3; i++)
889     {
890         if(i4_max_rp[i] > i4_max_rp[i4_max_ridx])
891             i4_max_ridx = i;
892 
893         if(i4_max_cp[i] > i4_max_cp[i4_max_cidx])
894             i4_max_cidx = i;
895     }
896 
897     if(EDGE_THR > i4_max_rp[i4_max_ridx])
898     {
899         i4_max_ridx = 0;
900     }
901 
902     if(EDGE_THR > i4_max_cp[i4_max_cidx])
903     {
904         i4_max_cidx = 0;
905     }
906 
907     i4_ret = ENABLE_2Nx2N;
908 
909     /* If only vertical discontinuity, go with one of 2Nx? */
910     if(0 == (i4_max_ridx + i4_max_cidx))
911     {
912         //num_me_parts++;
913         return i4_ret;
914     }
915 
916     if(i4_max_ridx && (i4_max_cidx == 0))
917     {
918         //num_me_parts += 3;
919         return ((i4_ret | i4_seg_lutr[i4_max_ridx]));
920     }
921 
922     /* If only horizontal discontinuity, go with one of ?x2N */
923     if(i4_max_cidx && (i4_max_ridx == 0))
924     {
925         //num_me_parts += 3;
926         return ((i4_ret | i4_seg_lutc[i4_max_cidx]));
927     }
928 
929     /* If middle is dominant in both directions, go with NxN */
930     if((2 == i4_max_cidx) && (2 == i4_max_ridx))
931     {
932         //num_me_parts += 5;
933         return ((i4_ret | ENABLE_NxN));
934     }
935 
936     /* Otherwise, conservatively, enable NxN and the 2 AMPs */
937     //num_me_parts += 9;
938     return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]);
939 }
940 
941 /**
942 ********************************************************************************
943 *  @fn     hme_init_search_results(search_results_t *ps_search_results,
944 *                           S32 i4_num_ref,
945 *                           S32 i4_num_best_results,
946 *                           S32 i4_num_results_per_part,
947 *                           BLK_SIZE_T e_blk_size,
948 *                           S32 i4_x_off,
949 *                           S32 i4_y_off)
950 *
951 *  @brief  Initializes the search results structure with some key attributes
952 *
953 *  @param[out] ps_search_results : search results structure to initialise
954 *
955 *  @param[in] i4_num_Ref: corresponds to the number of ref ids searched
956 *
957 *  @param[in] i4_num_best_results: Number of best results for the CU to
958 *               be maintained in the result structure
959 *
960 *  @param[in] i4_num_results_per_part: Per active partition the number of best
961 *               results to be maintained
962 *
963 *  @param[in] e_blk_size: blk size of the CU for which this structure used
964 *
965 *  @param[in] i4_x_off: x offset of the top left of CU from CTB top left
966 *
967 *  @param[in] i4_y_off: y offset of the top left of CU from CTB top left
968 *
969 *  @param[in] pu1_is_past : points ot an array that tells whether a given ref id
970 *              has prominence in L0 or in L1 list (past or future )
971 *
972 *  @return void
973 ********************************************************************************
974 */
hme_init_search_results(search_results_t * ps_search_results,S32 i4_num_ref,S32 i4_num_best_results,S32 i4_num_results_per_part,BLK_SIZE_T e_blk_size,S32 i4_x_off,S32 i4_y_off,U08 * pu1_is_past)975 void hme_init_search_results(
976     search_results_t *ps_search_results,
977     S32 i4_num_ref,
978     S32 i4_num_best_results,
979     S32 i4_num_results_per_part,
980     BLK_SIZE_T e_blk_size,
981     S32 i4_x_off,
982     S32 i4_y_off,
983     U08 *pu1_is_past)
984 {
985     CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size];
986 
987     ASSERT(e_cu_size != -1);
988     ps_search_results->e_cu_size = e_cu_size;
989     ps_search_results->u1_x_off = (U08)i4_x_off;
990     ps_search_results->u1_y_off = (U08)i4_y_off;
991     ps_search_results->u1_num_active_ref = (U08)i4_num_ref;
992     ps_search_results->u1_num_best_results = (U08)i4_num_best_results;
993     ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part;
994     ps_search_results->pu1_is_past = pu1_is_past;
995     ps_search_results->u1_split_flag = 0;
996     ps_search_results->best_cu_cost = MAX_32BIT_VAL;
997 }
998 
999 /**
1000 ********************************************************************************
1001 *  @fn     hme_reset_search_results((search_results_t *ps_search_results,
1002 *                               S32 i4_part_mask)
1003 *
1004 *
1005 *  @brief  Resets the best results to maximum values, so as to allow search
1006 *          for the new CU's partitions. The existing results may be from an
1007 *          older CU using same structure.
1008 *
1009 *  @param[in] ps_search_results: search results structure
1010 *
1011 *  @param[in] i4_part_mask : bit mask of active partitions
1012 *
1013 *  @return part mask (bit mask of active partitions to search)
1014 ********************************************************************************
1015 */
hme_reset_search_results(search_results_t * ps_search_results,S32 i4_part_mask,S32 mv_res)1016 void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res)
1017 {
1018     S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref;
1019     S08 i1_ref_idx;
1020     S32 i, j;
1021     search_node_t *ps_search_node;
1022 
1023     /* store this for future use */
1024     ps_search_results->i4_part_mask = i4_part_mask;
1025 
1026     /* Reset the spli_flag to zero */
1027     ps_search_results->u1_split_flag = 0;
1028 
1029     HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res);
1030     HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res);
1031 
1032     for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
1033     {
1034         /* Reset the individual partitino results */
1035         for(i = 0; i < TOT_NUM_PARTS; i++)
1036         {
1037             if(!(i4_part_mask & (1 << i)))
1038                 continue;
1039 
1040             ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i];
1041 
1042             for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
1043             {
1044                 ps_search_node[j].s_mv.i2_mvx = 0;
1045                 ps_search_node[j].s_mv.i2_mvy = 0;
1046                 ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL;
1047                 ps_search_node[j].i4_sad = MAX_32BIT_VAL;
1048                 ps_search_node[j].i4_sdi = 0;
1049                 ps_search_node[j].i1_ref_idx = -1;
1050                 ps_search_node[j].u1_subpel_done = 0;
1051                 ps_search_node[j].u1_is_avail = 1;
1052                 ps_search_node[j].i4_mv_cost = 0;
1053             }
1054         }
1055     }
1056 }
1057 /**
1058 ********************************************************************************
1059 *  @fn     hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
1060 *                               S32 i4_step,
1061 *                               range_prms_t *ps_mvrange)
1062 *
1063 *  @brief  Given a central pt within mv range, and a grid of points surrounding
1064 *           this pt, this function returns a grid mask of pts within search rng
1065 *
1066 *  @param[in] ps_search_node: the centre pt of the grid
1067 *
1068 *  @param[in] i4_step: step size of grid
1069 *
1070 *  @param[in] ps_mvrange: structure containing the current mv range
1071 *
1072 *  @return bitmask of the  pts in grid within search range
1073 ********************************************************************************
1074 */
hme_clamp_grid_by_mvrange(search_node_t * ps_search_node,S32 i4_step,range_prms_t * ps_mvrange)1075 S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange)
1076 {
1077     S32 i4_mask = GRID_ALL_PTS_VALID;
1078     if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x)
1079     {
1080         i4_mask &= (GRID_RT_3_INVALID);
1081     }
1082     if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x)
1083     {
1084         i4_mask &= (GRID_LT_3_INVALID);
1085     }
1086     if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y)
1087     {
1088         i4_mask &= (GRID_BOT_3_INVALID);
1089     }
1090     if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y)
1091     {
1092         i4_mask &= (GRID_TOP_3_INVALID);
1093     }
1094     return i4_mask;
1095 }
1096 
1097 /**
1098 ********************************************************************************
1099 *  @fn    layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1100 S32 i4_layer_id)
1101 *
1102 *  @brief  returns the layer ctxt of the layer with given id from the temporally
1103 *          previous frame
1104 *
1105 *  @param[in] ps_ctxt : ME context
1106 *
1107 *  @param[in] i4_layer_id : id of layer required
1108 *
1109 *  @return layer ctxt of given layer id in temporally previous frame
1110 ********************************************************************************
1111 */
hme_get_past_layer_ctxt(me_ctxt_t * ps_ctxt,me_frm_ctxt_t * ps_frm_ctxt,S32 i4_layer_id,S32 i4_num_me_frm_pllel)1112 layer_ctxt_t *hme_get_past_layer_ctxt(
1113     me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel)
1114 {
1115     S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0];
1116     S32 i;
1117     layers_descr_t *ps_desc;
1118 
1119     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1120     {
1121         ps_desc = &ps_ctxt->as_ref_descr[i];
1122         if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1123             return (ps_desc->aps_layers[i4_layer_id]);
1124     }
1125     return NULL;
1126 }
1127 
1128 /**
1129 ********************************************************************************
1130 *  @fn    layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
1131 S32 i4_layer_id)
1132 *
1133 *  @brief  returns the layer ctxt of the layer with given id from the temporally
1134 *          previous frame
1135 *
1136 *  @param[in] ps_ctxt : ME context
1137 *
1138 *  @param[in] i4_layer_id : id of layer required
1139 *
1140 *  @return layer ctxt of given layer id in temporally previous frame
1141 ********************************************************************************
1142 */
hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t * ps_ctxt,S32 i4_layer_id)1143 layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id)
1144 {
1145     S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0];
1146     S32 i;
1147     layers_descr_t *ps_desc;
1148 
1149     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1150     {
1151         ps_desc = &ps_ctxt->as_ref_descr[i];
1152         if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
1153             return (ps_desc->aps_layers[i4_layer_id]);
1154     }
1155     return NULL;
1156 }
1157 
1158 /**
1159 ********************************************************************************
1160 *  @fn    void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
1161 BLK_SIZE_T e_blk_size,
1162 S32 i4_num_ref,
1163 S32 i4_num_results_per_part)
1164 *
1165 *  @brief  Given a blk size to be used for this layer, this function initialize
1166 *          the mv bank to make it ready to store and return results.
1167 *
1168 *  @param[in, out] ps_layer_ctxt: pointer to layer ctxt
1169 *
1170 *  @param[in] e_blk_size : resolution at which mvs are stored
1171 *
1172 *  @param[in] i4_num_ref: number of reference frames corresponding to which
1173 *              results are stored.
1174 *
1175 *  @param[in] e_blk_size : resolution at which mvs are stored
1176 *
1177 *  @param[in] i4_num_results_per_part : Number of results to be stored per
1178 *               ref idx. So these many best results stored
1179 *
1180 *  @return void
1181 ********************************************************************************
1182 */
hme_init_mv_bank(layer_ctxt_t * ps_layer_ctxt,BLK_SIZE_T e_blk_size,S32 i4_num_ref,S32 i4_num_results_per_part,U08 u1_enc)1183 void hme_init_mv_bank(
1184     layer_ctxt_t *ps_layer_ctxt,
1185     BLK_SIZE_T e_blk_size,
1186     S32 i4_num_ref,
1187     S32 i4_num_results_per_part,
1188     U08 u1_enc)
1189 {
1190     layer_mv_t *ps_mv_bank;
1191     hme_mv_t *ps_mv1, *ps_mv2;
1192     S08 *pi1_ref_id1, *pi1_ref_id2;
1193     S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col;
1194     S32 i4_i, i4_j, blk_ht;
1195 
1196     ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1197     ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part;
1198     ps_mv_bank->i4_num_ref = i4_num_ref;
1199     mvs_in_blk = i4_num_ref * i4_num_results_per_part;
1200     ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk;
1201 
1202     /*************************************************************************/
1203     /* Store blk size, from blk size derive blk width and use this to compute*/
1204     /* number of blocks every row. We also pad to left and top by 1, to      */
1205     /* support the prediction mechanism.                                     */
1206     /*************************************************************************/
1207     ps_mv_bank->e_blk_size = e_blk_size;
1208     blk_wd = gau1_blk_size_to_wd[e_blk_size];
1209     blk_ht = gau1_blk_size_to_ht[e_blk_size];
1210 
1211     blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd;
1212     blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht;
1213 
1214     if(u1_enc)
1215     {
1216         /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
1217         WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6);
1218         WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6);
1219 
1220         blks_in_row = (num_ctb_cols << 3);
1221         blks_in_col = (num_ctb_rows << 3);
1222     }
1223 
1224     blks_in_row += 2;
1225     mvs_in_row = blks_in_row * mvs_in_blk;
1226 
1227     ps_mv_bank->i4_num_blks_per_row = blks_in_row;
1228     ps_mv_bank->i4_num_mvs_per_row = mvs_in_row;
1229 
1230     /* To ensure run time requirements fall within allocation time request */
1231     ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row);
1232 
1233     /*************************************************************************/
1234     /* Increment by one full row at top for padding and one column in left   */
1235     /* this gives us the actual start of mv for 0,0 blk                      */
1236     /*************************************************************************/
1237     ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk;
1238     ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk;
1239 
1240     memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t));
1241     memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08));
1242 
1243     /*************************************************************************/
1244     /* Initialize top row, left col and right col with zeros since these are */
1245     /* used as candidates during searches.                                   */
1246     /*************************************************************************/
1247     ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row;
1248     ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk;
1249     pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row;
1250     pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk;
1251     for(i4_i = 0; i4_i < blks_in_col; i4_i++)
1252     {
1253         for(i4_j = 0; i4_j < mvs_in_blk; i4_j++)
1254         {
1255             ps_mv1[i4_j].i2_mv_x = 0;
1256             ps_mv1[i4_j].i2_mv_y = 0;
1257             ps_mv2[i4_j].i2_mv_x = 0;
1258             ps_mv2[i4_j].i2_mv_y = 0;
1259             pi1_ref_id1[i4_j] = -1;
1260             pi1_ref_id2[i4_j] = -1;
1261         }
1262         ps_mv1 += mvs_in_row;
1263         ps_mv2 += mvs_in_row;
1264         pi1_ref_id1 += mvs_in_row;
1265         pi1_ref_id2 += mvs_in_row;
1266     }
1267 }
hme_fill_mvbank_intra(layer_ctxt_t * ps_layer_ctxt)1268 void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt)
1269 {
1270     layer_mv_t *ps_mv_bank;
1271     hme_mv_t *ps_mv;
1272     S08 *pi1_ref_id;
1273     S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col;
1274     S32 i, j, blk_ht;
1275     BLK_SIZE_T e_blk_size;
1276 
1277     ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
1278 
1279     /*************************************************************************/
1280     /* Store blk size, from blk size derive blk width and use this to compute*/
1281     /* number of blocks every row. We also pad to left and top by 1, to      */
1282     /* support the prediction mechanism.                                     */
1283     /*************************************************************************/
1284     e_blk_size = ps_mv_bank->e_blk_size;
1285     blk_wd = gau1_blk_size_to_wd[e_blk_size];
1286     blk_ht = gau1_blk_size_to_wd[e_blk_size];
1287     blks_in_row = ps_layer_ctxt->i4_wd / blk_wd;
1288     blks_in_col = ps_layer_ctxt->i4_ht / blk_ht;
1289     mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk;
1290 
1291     /*************************************************************************/
1292     /* Increment by one full row at top for padding and one column in left   */
1293     /* this gives us the actual start of mv for 0,0 blk                      */
1294     /*************************************************************************/
1295     ps_mv = ps_mv_bank->ps_mv;
1296     pi1_ref_id = ps_mv_bank->pi1_ref_idx;
1297 
1298     for(i = 0; i < blks_in_col; i++)
1299     {
1300         for(j = 0; j < blks_in_row; j++)
1301         {
1302             ps_mv[j].i2_mv_x = INTRA_MV;
1303             ps_mv[j].i2_mv_y = INTRA_MV;
1304             pi1_ref_id[j] = -1;
1305         }
1306         ps_mv += ps_mv_bank->i4_num_mvs_per_row;
1307         pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row;
1308     }
1309 }
1310 
1311 /**
1312 ********************************************************************************
1313 *  @fn    void hme_derive_search_range(range_prms_t *ps_range,
1314 *                                   range_prms_t *ps_pic_limit,
1315 *                                   range_prms_t *ps_mv_limit,
1316 *                                   S32 i4_x,
1317 *                                   S32 i4_y,
1318 *                                   S32 blk_wd,
1319 *                                   S32 blk_ht)
1320 *
1321 *  @brief  given picture limits and blk dimensions and mv search limits, obtains
1322 *          teh valid search range such that the blk stays within pic boundaries,
1323 *          where picture boundaries include padded portions of picture
1324 *
1325 *  @param[out] ps_range: updated with actual search range
1326 *
1327 *  @param[in] ps_pic_limit : picture boundaries
1328 *
1329 *  @param[in] ps_mv_limit: Search range limits for the mvs
1330 *
1331 *  @param[in] i4_x : x coordinate of the blk
1332 *
1333 *  @param[in] i4_y : y coordinate of the blk
1334 *
1335 *  @param[in] blk_wd : blk width
1336 *
1337 *  @param[in] blk_ht : blk height
1338 *
1339 *  @return void
1340 ********************************************************************************
1341 */
hme_derive_search_range(range_prms_t * ps_range,range_prms_t * ps_pic_limit,range_prms_t * ps_mv_limit,S32 i4_x,S32 i4_y,S32 blk_wd,S32 blk_ht)1342 void hme_derive_search_range(
1343     range_prms_t *ps_range,
1344     range_prms_t *ps_pic_limit,
1345     range_prms_t *ps_mv_limit,
1346     S32 i4_x,
1347     S32 i4_y,
1348     S32 blk_wd,
1349     S32 blk_ht)
1350 {
1351     ps_range->i2_max_x =
1352         MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x);
1353     ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
1354     ps_range->i2_max_y =
1355         MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y);
1356     ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
1357 }
1358 
1359 /**
1360 ********************************************************************************
1361 *  @fn    void hme_get_spatial_candt(search_node_t *ps_search_node,
1362 *                                   layer_ctxt_t *ps_curr_layer,
1363 *                                   S32 i4_blk_x,
1364 *                                   S32 i4_blk_y,
1365 *                                   S08 i1_ref_id,
1366 *                                   S32 i4_result_id)
1367 *
1368 *  @brief  obtains a candt from the same mv bank as the current one, its called
1369 *          spatial candt as it does not require scaling for temporal distances
1370 *
1371 *  @param[out] ps_search_node: mv and ref id updated here of the candt
1372 *
1373 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1374 *
1375 *  @param[in] i4_blk_x : x coordinate of the block in mv bank
1376 *
1377 *  @param[in] i4_blk_y : y coordinate of the block in mv bank
1378 *
1379 *  @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv
1380 *              results, useful if multiple ref idx candts maintained separately.
1381 *
1382 *  @param[in] i4_result_id : If multiple results stored per ref idx, this
1383 *              pts to the id of the result
1384 *
1385 *  @param[in] tr_avail : top right availability of the block
1386 *
1387 *  @param[in] bl_avail : bottom left availability of the block
1388 *
1389 *  @return void
1390 ********************************************************************************
1391 */
hme_get_spatial_candt(layer_ctxt_t * ps_curr_layer,BLK_SIZE_T e_search_blk_size,S32 i4_blk_x,S32 i4_blk_y,S08 i1_ref_idx,search_node_t * ps_top_neighbours,search_node_t * ps_left_neighbours,S32 i4_result_id,S32 tr_avail,S32 bl_avail,S32 encode)1392 void hme_get_spatial_candt(
1393     layer_ctxt_t *ps_curr_layer,
1394     BLK_SIZE_T e_search_blk_size,
1395     S32 i4_blk_x,
1396     S32 i4_blk_y,
1397     S08 i1_ref_idx,
1398     search_node_t *ps_top_neighbours,
1399     search_node_t *ps_left_neighbours,
1400     S32 i4_result_id,
1401     S32 tr_avail,
1402     S32 bl_avail,
1403     S32 encode)
1404 
1405 {
1406     layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1407     S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1408     S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1409     search_node_t *ps_search_node;
1410     S32 i4_offset;
1411     hme_mv_t *ps_mv, *ps_mv_base;
1412     S08 *pi1_ref_idx, *pi1_ref_idx_base;
1413     S32 jump = 1, mvs_in_blk, mvs_in_row;
1414     S32 shift = (encode ? 2 : 0);
1415 
1416     if(i4_blk_size1 != i4_blk_size2)
1417     {
1418         i4_blk_x <<= 1;
1419         i4_blk_y <<= 1;
1420         jump = 2;
1421         if((i4_blk_size1 << 2) == i4_blk_size2)
1422         {
1423             i4_blk_x <<= 1;
1424             i4_blk_y <<= 1;
1425             jump = 4;
1426         }
1427     }
1428 
1429     mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1430     mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1431 
1432     /* Adjust teh blk coord to point to top left locn */
1433     i4_blk_x -= 1;
1434     i4_blk_y -= 1;
1435     /* Pick up the mvs from the location */
1436     i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1437     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1438 
1439     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1440     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1441 
1442     ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1443     pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
1444 
1445     ps_mv_base = ps_mv;
1446     pi1_ref_idx_base = pi1_ref_idx;
1447 
1448     /* ps_mv and pi1_ref_idx now point to the top left locn */
1449     /* Get 4 mvs as follows:                                */
1450     ps_search_node = ps_top_neighbours;
1451     COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1452 
1453     /* Move to top */
1454     ps_search_node++;
1455     ps_mv += mvs_in_blk;
1456     pi1_ref_idx += mvs_in_blk;
1457     COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1458 
1459     /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1460     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1461     {
1462         ps_search_node++;
1463         ps_mv += (mvs_in_blk * (jump >> 1));
1464         pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1465         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1466     }
1467     else
1468     {
1469         ps_search_node++;
1470         ps_search_node->s_mv.i2_mvx = 0;
1471         ps_search_node->s_mv.i2_mvy = 0;
1472         ps_search_node->i1_ref_idx = i1_ref_idx;
1473         ps_search_node->u1_is_avail = 0;
1474         ps_search_node->u1_subpel_done = 0;
1475     }
1476 
1477     /* Move to tr: this will be tr w.r.t. the blk being searched */
1478     ps_search_node++;
1479     if(tr_avail == 0)
1480     {
1481         ps_search_node->s_mv.i2_mvx = 0;
1482         ps_search_node->s_mv.i2_mvy = 0;
1483         ps_search_node->i1_ref_idx = i1_ref_idx;
1484         ps_search_node->u1_is_avail = 0;
1485         ps_search_node->u1_subpel_done = 0;
1486     }
1487     else
1488     {
1489         ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1490         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1491         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1492     }
1493 
1494     /* Move to left */
1495     ps_search_node = ps_left_neighbours;
1496     ps_mv = ps_mv_base + mvs_in_row;
1497     pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1498     COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1499 
1500     /* Move to l1 */
1501     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1502     {
1503         ps_search_node++;
1504         ps_mv += (mvs_in_row * (jump >> 1));
1505         pi1_ref_idx += (mvs_in_row * (jump >> 1));
1506         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1507     }
1508     else
1509     {
1510         ps_search_node++;
1511         ps_search_node->s_mv.i2_mvx = 0;
1512         ps_search_node->s_mv.i2_mvy = 0;
1513         ps_search_node->i1_ref_idx = i1_ref_idx;
1514         ps_search_node->u1_is_avail = 0;
1515         ps_search_node->u1_subpel_done = 0;
1516     }
1517 
1518     /* Move to bl */
1519     ps_search_node++;
1520     if(bl_avail == 0)
1521     {
1522         ps_search_node->s_mv.i2_mvx = 0;
1523         ps_search_node->s_mv.i2_mvy = 0;
1524         ps_search_node->i1_ref_idx = i1_ref_idx;
1525         ps_search_node->u1_is_avail = 0;
1526     }
1527     else
1528     {
1529         ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1530         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1531         COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
1532     }
1533 }
1534 
hme_get_spatial_candt_in_l1_me(layer_ctxt_t * ps_curr_layer,BLK_SIZE_T e_search_blk_size,S32 i4_blk_x,S32 i4_blk_y,S08 i1_ref_idx,U08 u1_pred_dir,search_node_t * ps_top_neighbours,search_node_t * ps_left_neighbours,S32 i4_result_id,S32 tr_avail,S32 bl_avail,S32 i4_num_act_ref_l0,S32 i4_num_act_ref_l1)1535 void hme_get_spatial_candt_in_l1_me(
1536     layer_ctxt_t *ps_curr_layer,
1537     BLK_SIZE_T e_search_blk_size,
1538     S32 i4_blk_x,
1539     S32 i4_blk_y,
1540     S08 i1_ref_idx,
1541     U08 u1_pred_dir,
1542     search_node_t *ps_top_neighbours,
1543     search_node_t *ps_left_neighbours,
1544     S32 i4_result_id,
1545     S32 tr_avail,
1546     S32 bl_avail,
1547     S32 i4_num_act_ref_l0,
1548     S32 i4_num_act_ref_l1)
1549 {
1550     search_node_t *ps_search_node;
1551     hme_mv_t *ps_mv, *ps_mv_base;
1552 
1553     S32 i4_offset;
1554     S32 mvs_in_blk, mvs_in_row;
1555     S08 *pi1_ref_idx, *pi1_ref_idx_base;
1556     S32 i4_mv_pos_in_implicit_array;
1557 
1558     layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1559 
1560     S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
1561     S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
1562     S32 jump = 1;
1563     S32 shift = 0;
1564     S32 i4_num_results_in_given_dir =
1565         ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1)
1566                             : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0));
1567 
1568     if(i4_blk_size1 != i4_blk_size2)
1569     {
1570         i4_blk_x <<= 1;
1571         i4_blk_y <<= 1;
1572         jump = 2;
1573         if((i4_blk_size1 << 2) == i4_blk_size2)
1574         {
1575             i4_blk_x <<= 1;
1576             i4_blk_y <<= 1;
1577             jump = 4;
1578         }
1579     }
1580 
1581     mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1582     mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1583 
1584     /* Adjust the blk coord to point to top left locn */
1585     i4_blk_x -= 1;
1586     i4_blk_y -= 1;
1587     /* Pick up the mvs from the location */
1588     i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1589     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
1590 
1591     i4_offset +=
1592         ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0);
1593 
1594     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1595     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1596 
1597     ps_mv_base = ps_mv;
1598     pi1_ref_idx_base = pi1_ref_idx;
1599 
1600     /* TL */
1601     {
1602         /* ps_mv and pi1_ref_idx now point to the top left locn */
1603         ps_search_node = ps_top_neighbours;
1604 
1605         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1606             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1607 
1608         if(-1 != i4_mv_pos_in_implicit_array)
1609         {
1610             COPY_MV_TO_SEARCH_NODE(
1611                 ps_search_node,
1612                 &ps_mv[i4_mv_pos_in_implicit_array],
1613                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1614                 i1_ref_idx,
1615                 shift);
1616         }
1617         else
1618         {
1619             ps_search_node->u1_is_avail = 0;
1620             ps_search_node->s_mv.i2_mvx = 0;
1621             ps_search_node->s_mv.i2_mvy = 0;
1622             ps_search_node->i1_ref_idx = i1_ref_idx;
1623         }
1624     }
1625 
1626     /* Move to top */
1627     {
1628         /* ps_mv and pi1_ref_idx now point to the top left locn */
1629         ps_search_node++;
1630         ps_mv += mvs_in_blk;
1631         pi1_ref_idx += mvs_in_blk;
1632 
1633         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1634             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1635 
1636         if(-1 != i4_mv_pos_in_implicit_array)
1637         {
1638             COPY_MV_TO_SEARCH_NODE(
1639                 ps_search_node,
1640                 &ps_mv[i4_mv_pos_in_implicit_array],
1641                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1642                 i1_ref_idx,
1643                 shift);
1644         }
1645         else
1646         {
1647             ps_search_node->u1_is_avail = 0;
1648             ps_search_node->s_mv.i2_mvx = 0;
1649             ps_search_node->s_mv.i2_mvy = 0;
1650             ps_search_node->i1_ref_idx = i1_ref_idx;
1651         }
1652     }
1653 
1654     /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
1655     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1656     {
1657         ps_search_node++;
1658         ps_mv += (mvs_in_blk * (jump >> 1));
1659         pi1_ref_idx += (mvs_in_blk * (jump >> 1));
1660 
1661         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1662             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1663 
1664         if(-1 != i4_mv_pos_in_implicit_array)
1665         {
1666             COPY_MV_TO_SEARCH_NODE(
1667                 ps_search_node,
1668                 &ps_mv[i4_mv_pos_in_implicit_array],
1669                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1670                 i1_ref_idx,
1671                 shift);
1672         }
1673         else
1674         {
1675             ps_search_node->u1_is_avail = 0;
1676             ps_search_node->s_mv.i2_mvx = 0;
1677             ps_search_node->s_mv.i2_mvy = 0;
1678             ps_search_node->i1_ref_idx = i1_ref_idx;
1679         }
1680     }
1681     else
1682     {
1683         ps_search_node++;
1684         ps_search_node->u1_is_avail = 0;
1685         ps_search_node->s_mv.i2_mvx = 0;
1686         ps_search_node->s_mv.i2_mvy = 0;
1687         ps_search_node->i1_ref_idx = i1_ref_idx;
1688     }
1689 
1690     /* Move to tr: this will be tr w.r.t. the blk being searched */
1691     ps_search_node++;
1692     if(tr_avail == 0)
1693     {
1694         ps_search_node->s_mv.i2_mvx = 0;
1695         ps_search_node->s_mv.i2_mvy = 0;
1696         ps_search_node->i1_ref_idx = i1_ref_idx;
1697         ps_search_node->u1_is_avail = 0;
1698         ps_search_node->u1_subpel_done = 0;
1699     }
1700     else
1701     {
1702         /* ps_mv and pi1_ref_idx now point to the top left locn */
1703         ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
1704         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
1705 
1706         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1707             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1708 
1709         if(-1 != i4_mv_pos_in_implicit_array)
1710         {
1711             COPY_MV_TO_SEARCH_NODE(
1712                 ps_search_node,
1713                 &ps_mv[i4_mv_pos_in_implicit_array],
1714                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1715                 i1_ref_idx,
1716                 shift);
1717         }
1718         else
1719         {
1720             ps_search_node->u1_is_avail = 0;
1721             ps_search_node->s_mv.i2_mvx = 0;
1722             ps_search_node->s_mv.i2_mvy = 0;
1723             ps_search_node->i1_ref_idx = i1_ref_idx;
1724         }
1725     }
1726 
1727     /* Move to left */
1728     {
1729         /* ps_mv and pi1_ref_idx now point to the top left locn */
1730         ps_search_node = ps_left_neighbours;
1731         ps_mv = ps_mv_base + mvs_in_row;
1732         pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1733 
1734         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1735             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1736 
1737         if(-1 != i4_mv_pos_in_implicit_array)
1738         {
1739             COPY_MV_TO_SEARCH_NODE(
1740                 ps_search_node,
1741                 &ps_mv[i4_mv_pos_in_implicit_array],
1742                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1743                 i1_ref_idx,
1744                 shift);
1745         }
1746         else
1747         {
1748             ps_search_node->u1_is_avail = 0;
1749             ps_search_node->s_mv.i2_mvx = 0;
1750             ps_search_node->s_mv.i2_mvy = 0;
1751             ps_search_node->i1_ref_idx = i1_ref_idx;
1752         }
1753     }
1754 
1755     /* Move to l1 */
1756     if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
1757     {
1758         /* ps_mv and pi1_ref_idx now point to the top left locn */
1759         ps_search_node++;
1760         ps_mv += (mvs_in_row * (jump >> 1));
1761         pi1_ref_idx += (mvs_in_row * (jump >> 1));
1762 
1763         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1764             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1765 
1766         if(-1 != i4_mv_pos_in_implicit_array)
1767         {
1768             COPY_MV_TO_SEARCH_NODE(
1769                 ps_search_node,
1770                 &ps_mv[i4_mv_pos_in_implicit_array],
1771                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1772                 i1_ref_idx,
1773                 shift);
1774         }
1775         else
1776         {
1777             ps_search_node->u1_is_avail = 0;
1778             ps_search_node->s_mv.i2_mvx = 0;
1779             ps_search_node->s_mv.i2_mvy = 0;
1780             ps_search_node->i1_ref_idx = i1_ref_idx;
1781         }
1782     }
1783     else
1784     {
1785         ps_search_node++;
1786         ps_search_node->u1_is_avail = 0;
1787         ps_search_node->s_mv.i2_mvx = 0;
1788         ps_search_node->s_mv.i2_mvy = 0;
1789         ps_search_node->i1_ref_idx = i1_ref_idx;
1790     }
1791 
1792     /* Move to bl */
1793     ps_search_node++;
1794     if(bl_avail == 0)
1795     {
1796         ps_search_node->s_mv.i2_mvx = 0;
1797         ps_search_node->s_mv.i2_mvy = 0;
1798         ps_search_node->i1_ref_idx = i1_ref_idx;
1799         ps_search_node->u1_is_avail = 0;
1800     }
1801     else
1802     {
1803         /* ps_mv and pi1_ref_idx now point to the top left locn */
1804         ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
1805         pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
1806 
1807         i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
1808             pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
1809 
1810         if(-1 != i4_mv_pos_in_implicit_array)
1811         {
1812             COPY_MV_TO_SEARCH_NODE(
1813                 ps_search_node,
1814                 &ps_mv[i4_mv_pos_in_implicit_array],
1815                 &pi1_ref_idx[i4_mv_pos_in_implicit_array],
1816                 i1_ref_idx,
1817                 shift);
1818         }
1819         else
1820         {
1821             ps_search_node->u1_is_avail = 0;
1822             ps_search_node->s_mv.i2_mvx = 0;
1823             ps_search_node->s_mv.i2_mvy = 0;
1824             ps_search_node->i1_ref_idx = i1_ref_idx;
1825         }
1826     }
1827 }
1828 
1829 /**
1830 ********************************************************************************
1831 *  @fn    void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
1832 *                                   S32 i4_blk_x,
1833 *                                   S32 i4_blk_y,
1834 *                                   mvgrid_t *ps_mv_grid ,
1835 *                                   S32 i1_ref_id)
1836 *
1837 *  @brief  The 18x18 MV grid for a ctb, is filled in first row and 1st col
1838 *          this corresponds to neighbours (TL, T, TR, L, BL)
1839 *
1840 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
1841 *
1842 *  @param[in] blk_x : x coordinate of the block in mv bank
1843 *
1844 *  @param[in] blk_y : y coordinate of the block in mv bank
1845 *
1846 *  @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
1847 *
1848 *  @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
1849 *              results, useful if multiple ref idx candts maintained separately.
1850 *
1851 *  @return void
1852 ********************************************************************************
1853 */
hme_fill_ctb_neighbour_mvs(layer_ctxt_t * ps_curr_layer,S32 blk_x,S32 blk_y,mv_grid_t * ps_mv_grid,U08 u1_pred_dir_ctr,U08 u1_default_ref_id,S32 i4_num_act_ref_l0)1854 void hme_fill_ctb_neighbour_mvs(
1855     layer_ctxt_t *ps_curr_layer,
1856     S32 blk_x,
1857     S32 blk_y,
1858     mv_grid_t *ps_mv_grid,
1859     U08 u1_pred_dir_ctr,
1860     U08 u1_default_ref_id,
1861     S32 i4_num_act_ref_l0)
1862 {
1863     search_node_t *ps_grid_node;
1864     layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
1865     S32 i4_offset;
1866     hme_mv_t *ps_mv, *ps_mv_base;
1867     S08 *pi1_ref_idx, *pi1_ref_idx_base;
1868     S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row;
1869 
1870     if(ps_layer_mvbank->e_blk_size == BLK_4x4)
1871     {
1872         /* searching 16x16, mvs are for 4x4 */
1873         jump = 1;
1874         blk_x <<= 2;
1875         blk_y <<= 2;
1876     }
1877     else
1878     {
1879         /* Searching 16x16, mvs are for 8x8 */
1880         blk_x <<= 1;
1881         blk_y <<= 1;
1882     }
1883     ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16);
1884 
1885     mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
1886     mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
1887 
1888     /* Adjust the blk coord to point to top left locn */
1889     blk_x -= 1;
1890     blk_y -= 1;
1891 
1892     /* Pick up the mvs from the location */
1893     i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
1894     i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
1895 
1896     i4_offset += (u1_pred_dir_ctr == 1);
1897 
1898     ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
1899     pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
1900 
1901     ps_mv_base = ps_mv;
1902     pi1_ref_idx_base = pi1_ref_idx;
1903 
1904     /* the 0, 0 entry of the grid pts to top left for the ctb */
1905     ps_grid_node = &ps_mv_grid->as_node[0];
1906 
1907     /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */
1908     for(i = 0; i < 18; i++)
1909     {
1910         COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1911         ps_grid_node++;
1912         inc = 1;
1913         /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1914         if(i & 1)
1915             inc = jump;
1916 
1917         ps_mv += (mvs_in_blk * inc);
1918         pi1_ref_idx += (mvs_in_blk * inc);
1919     }
1920 
1921     ps_mv = ps_mv_base + mvs_in_row;
1922     pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
1923 
1924     /* now copy left 16 left mvs */
1925     ps_grid_node = &ps_mv_grid->as_node[0];
1926     ps_grid_node += (ps_mv_grid->i4_stride);
1927     for(i = 0; i < 16; i++)
1928     {
1929         COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
1930         ps_grid_node += ps_mv_grid->i4_stride;
1931         inc = 1;
1932         /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
1933         if(!(i & 1))
1934             inc = jump;
1935 
1936         ps_mv += (mvs_in_row * inc);
1937         pi1_ref_idx += (mvs_in_row * inc);
1938     }
1939     /* last one set to invalid as bottom left not yet encoded */
1940     ps_grid_node->u1_is_avail = 0;
1941 }
1942 
hme_reset_wkg_mem(buf_mgr_t * ps_buf_mgr)1943 void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr)
1944 {
1945     ps_buf_mgr->i4_used = 0;
1946 }
hme_init_wkg_mem(buf_mgr_t * ps_buf_mgr,U08 * pu1_mem,S32 size)1947 void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size)
1948 {
1949     ps_buf_mgr->pu1_wkg_mem = pu1_mem;
1950     ps_buf_mgr->i4_total = size;
1951     hme_reset_wkg_mem(ps_buf_mgr);
1952 }
1953 
hme_init_mv_grid(mv_grid_t * ps_mv_grid)1954 void hme_init_mv_grid(mv_grid_t *ps_mv_grid)
1955 {
1956     S32 i, j;
1957     search_node_t *ps_search_node;
1958     /*************************************************************************/
1959     /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/
1960     /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */
1961     /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/
1962     /* left and top edges of this grid is filled run time. The center portion*/
1963     /* represents the actual CTB MVs (16x16) and is also filled run time.    */
1964     /* However, the availability is always set as available (init time)      */
1965     /*************************************************************************/
1966     ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID;
1967     ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD;
1968     ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset];
1969     for(i = 0; i < 16; i++)
1970     {
1971         for(j = 0; j < 16; j++)
1972         {
1973             ps_search_node[j].u1_is_avail = 1;
1974         }
1975 
1976         ps_search_node += ps_mv_grid->i4_stride;
1977     }
1978 }
1979 /**
1980 ********************************************************************************
1981 *  @fn    void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1982 *
1983 *  @brief  Pads horizontally to left side. Each pixel replicated across a line
1984 *
1985 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
1986 *
1987 *  @param[in] stride : stride of destination buffer
1988 *
1989 *  @param[in] pad_wd : Amt of horizontal padding to be done
1990 *
1991 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
1992 *
1993 *  @return void
1994 ********************************************************************************
1995 */
hme_pad_left(U08 * pu1_dst,S32 stride,S32 pad_wd,S32 pad_ht)1996 void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
1997 {
1998     S32 i, j;
1999     U08 u1_val;
2000     for(i = 0; i < pad_ht; i++)
2001     {
2002         u1_val = pu1_dst[0];
2003         for(j = -pad_wd; j < 0; j++)
2004             pu1_dst[j] = u1_val;
2005 
2006         pu1_dst += stride;
2007     }
2008 }
2009 /**
2010 ********************************************************************************
2011 *  @fn    void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2012 *
2013 *  @brief  Pads horizontally to rt side. Each pixel replicated across a line
2014 *
2015 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
2016 *
2017 *  @param[in] stride : stride of destination buffer
2018 *
2019 *  @param[in] pad_wd : Amt of horizontal padding to be done
2020 *
2021 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
2022 *
2023 *  @return void
2024 ********************************************************************************
2025 */
hme_pad_right(U08 * pu1_dst,S32 stride,S32 pad_wd,S32 pad_ht)2026 void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
2027 {
2028     S32 i, j;
2029     U08 u1_val;
2030     for(i = 0; i < pad_ht; i++)
2031     {
2032         u1_val = pu1_dst[0];
2033         for(j = 1; j <= pad_wd; j++)
2034             pu1_dst[j] = u1_val;
2035 
2036         pu1_dst += stride;
2037     }
2038 }
2039 /**
2040 ********************************************************************************
2041 *  @fn    void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2042 *
2043 *  @brief  Pads vertically on the top. Repeats the top line for top padding
2044 *
2045 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2046 *
2047 *  @param[in] stride : stride of destination buffer
2048 *
2049 *  @param[in] pad_ht : Amt of vertical padding to be done
2050 *
2051 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
2052 *
2053 *  @return void
2054 ********************************************************************************
2055 */
hme_pad_top(U08 * pu1_dst,S32 stride,S32 pad_ht,S32 pad_wd)2056 void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2057 {
2058     S32 i;
2059     for(i = 1; i <= pad_ht; i++)
2060         memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd);
2061 }
2062 /**
2063 ********************************************************************************
2064 *  @fn    void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2065 *
2066 *  @brief  Pads vertically on the bot. Repeats the top line for top padding
2067 *
2068 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
2069 *
2070 *  @param[in] stride : stride of destination buffer
2071 *
2072 *  @param[in] pad_ht : Amt of vertical padding to be done
2073 *
2074 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
2075 *
2076 *  @return void
2077 ********************************************************************************
2078 */
hme_pad_bot(U08 * pu1_dst,S32 stride,S32 pad_ht,S32 pad_wd)2079 void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
2080 {
2081     S32 i;
2082     for(i = 1; i <= pad_ht; i++)
2083         memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd);
2084 }
2085 
2086 /**
2087 ********************************************************************************
2088 *  @fn    void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer,  S32 pos_x,
2089 *                           S32 pos_y, S32 size)
2090 *
2091 *  @brief  Does weighting of the input in case the search needs to happen
2092 *          with reference frames weighted
2093 *
2094 *  @param[in] ps_curr_layer: layer ctxt
2095 *
2096 *  @param[in] pos_x : x coordinate of the input blk in the picture
2097 *
2098 *  @param[in] pos_y : y coordinate of hte input blk in the picture
2099 *
2100 *  @param[in] size : size of the input block
2101 *
2102 *  @param[in] num_ref : Number of reference frames
2103 *
2104 *  @return void
2105 ********************************************************************************
2106 */
hme_get_wt_inp(layer_ctxt_t * ps_curr_layer,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 dst_stride,S32 pos_x,S32 pos_y,S32 size,S32 num_ref,U08 u1_is_wt_pred_on)2107 void hme_get_wt_inp(
2108     layer_ctxt_t *ps_curr_layer,
2109     wgt_pred_ctxt_t *ps_wt_inp_prms,
2110     S32 dst_stride,
2111     S32 pos_x,
2112     S32 pos_y,
2113     S32 size,
2114     S32 num_ref,
2115     U08 u1_is_wt_pred_on)
2116 {
2117     S32 ref, i, j;
2118     U08 *pu1_src, *pu1_dst, *pu1_src_tmp;
2119     S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc;
2120     S32 x_count, y_count;
2121 
2122     /* Fixed source */
2123     pu1_src = ps_curr_layer->pu1_inp;
2124 
2125     /* Make sure the start positions of block are inside frame limits */
2126     pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
2127     pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
2128 
2129     pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride));
2130 
2131     /* In case we handle imcomplete CTBs, we copy only as much as reqd */
2132     /* from input buffers to prevent out of bound accesses. In this    */
2133     /* case, we do padding in x or y or both dirns */
2134     x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
2135     y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
2136 
2137     for(i = 0; i < num_ref + 1; i++)
2138     {
2139         ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
2140     }
2141 
2142     /* Run thro all ref ids */
2143     for(ref = 0; ref < num_ref + 1; ref++)
2144     {
2145         S32 wt, off;
2146         S32 inv_wt;
2147 
2148         pu1_src_tmp = pu1_src;
2149 
2150         /* Each ref id may have differnet wt/offset. */
2151         /* So we have unique inp buf for each ref id */
2152         pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2153 
2154         if(ref == num_ref)
2155         {
2156             /* last ref will be non weighted input */
2157             for(i = 0; i < y_count; i++)
2158             {
2159                 for(j = 0; j < x_count; j++)
2160                 {
2161                     pu1_dst[j] = pu1_src_tmp[j];
2162                 }
2163                 pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2164                 pu1_dst += dst_stride;
2165             }
2166         }
2167         else
2168         {
2169             /* Wt and off specific to this ref id */
2170             wt = ps_wt_inp_prms->a_wpred_wt[ref];
2171             inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref];
2172             off = ps_wt_inp_prms->a_wpred_off[ref];
2173 
2174             /* Generate size*size worth of modified input samples */
2175             for(i = 0; i < y_count; i++)
2176             {
2177                 for(j = 0; j < x_count; j++)
2178                 {
2179                     S32 tmp;
2180 
2181                     /* Since we scale input, we use inverse transform of wt pred */
2182                     //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc);
2183                     tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc);
2184                     pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255));
2185                 }
2186                 pu1_src_tmp += ps_curr_layer->i4_inp_stride;
2187                 pu1_dst += dst_stride;
2188             }
2189         }
2190 
2191         /* Check and do padding in right direction if need be */
2192         pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
2193         if(x_count != size)
2194         {
2195             hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
2196         }
2197 
2198         /* Check and do padding in bottom directino if need be */
2199         if(y_count != size)
2200         {
2201             hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
2202         }
2203     }
2204 }
2205 /**
2206 ****************************************************************************************
2207 *  @fn     hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst,
2208 *                                pu_result_t *ps_pu_results_inp,
2209 *                                UWORD8 u1_num_results_per_part,
2210 *                                UWORD8 u1_num_best_cand)
2211 *
2212 *  @brief  Does the candidate evaluation across all the current candidates and returns
2213 *           the best two or one candidates across given lists
2214 *
2215 *  @param[in]  - ps_pu_results_inp : Pointer to the input candidates
2216 *              - u1_num_results_per_part: Number of available candidates
2217 *
2218 *  @param[out] - ps_pu_results_dst : Pointer to best PU results
2219 *
2220 ****************************************************************************************
2221 */
hme_pick_best_pu_cand(pu_result_t * ps_pu_results_dst,pu_result_t * ps_pu_results_list0,pu_result_t * ps_pu_results_list1,UWORD8 u1_num_results_per_part_l0,UWORD8 u1_num_results_per_part_l1,UWORD8 u1_candidate_rank)2222 void hme_pick_best_pu_cand(
2223     pu_result_t *ps_pu_results_dst,
2224     pu_result_t *ps_pu_results_list0,
2225     pu_result_t *ps_pu_results_list1,
2226     UWORD8 u1_num_results_per_part_l0,
2227     UWORD8 u1_num_results_per_part_l1,
2228     UWORD8 u1_candidate_rank)
2229 {
2230     struct cand_pos_data
2231     {
2232         U08 u1_cand_list_id;
2233 
2234         U08 u1_cand_id_in_cand_list;
2235     } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2236 
2237     S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1];
2238     U08 i, j;
2239 
2240     for(i = 0; i < u1_num_results_per_part_l0; i++)
2241     {
2242         ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost;
2243         as_cand_pos_data[i].u1_cand_id_in_cand_list = i;
2244         as_cand_pos_data[i].u1_cand_list_id = 0;
2245     }
2246 
2247     for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++)
2248     {
2249         ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost;
2250         as_cand_pos_data[j].u1_cand_id_in_cand_list = i;
2251         as_cand_pos_data[j].u1_cand_list_id = 1;
2252     }
2253 
2254     SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
2255         ai4_costs,
2256         as_cand_pos_data,
2257         u1_num_results_per_part_l0 + u1_num_results_per_part_l1,
2258         struct cand_pos_data);
2259 
2260     if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id)
2261     {
2262         ps_pu_results_dst[0] =
2263             ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2264     }
2265     else
2266     {
2267         ps_pu_results_dst[0] =
2268             ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
2269     }
2270 }
2271 
2272 /* Returns the number of candidates */
hme_tu_recur_cand_harvester(part_type_results_t * ps_cand_container,inter_pu_results_t * ps_pu_data,inter_ctb_prms_t * ps_inter_ctb_prms,S32 i4_part_mask)2273 static S32 hme_tu_recur_cand_harvester(
2274     part_type_results_t *ps_cand_container,
2275     inter_pu_results_t *ps_pu_data,
2276     inter_ctb_prms_t *ps_inter_ctb_prms,
2277     S32 i4_part_mask)
2278 {
2279     part_type_results_t s_cand_data;
2280 
2281     U08 i, j;
2282     PART_ID_T e_part_id;
2283 
2284     S32 i4_num_cands = 0;
2285 
2286     /* 2Nx2N part_type decision part */
2287     if(i4_part_mask & ENABLE_2Nx2N)
2288     {
2289         U08 u1_num_candt_to_pick;
2290 
2291         e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0];
2292 
2293         ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1);
2294 
2295         if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N))
2296         {
2297             u1_num_candt_to_pick =
2298                 MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands,
2299                     ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2300                         ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2301         }
2302         else
2303         {
2304             u1_num_candt_to_pick =
2305                 MIN(1,
2306                     ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
2307                         ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
2308         }
2309 
2310         if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset)
2311         {
2312             u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25);
2313         }
2314 
2315         for(i = 0; i < u1_num_candt_to_pick; i++)
2316         {
2317             /* Picks the best two candidates of all the available ones */
2318             hme_pick_best_pu_cand(
2319                 ps_cand_container[i4_num_cands].as_pu_results,
2320                 ps_pu_data->aps_pu_results[0][e_part_id],
2321                 ps_pu_data->aps_pu_results[1][e_part_id],
2322                 ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2323                 ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2324                 i);
2325 
2326             /* Update the other params part_type and total_cost in part_type_results */
2327             ps_cand_container[i4_num_cands].u1_part_type = e_part_id;
2328             ps_cand_container[i4_num_cands].i4_tot_cost =
2329                 ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost;
2330 
2331             i4_num_cands++;
2332         }
2333     }
2334 
2335     /* SMP */
2336     {
2337         S32 i4_total_cost;
2338 
2339         S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1;
2340         S32 start_part_type = PRT_2NxN;
2341         S32 best_cost = MAX_32BIT_VAL;
2342         S32 part_type_cnt = 0;
2343 
2344         for(j = 0; j < num_part_types; j++)
2345         {
2346             if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2347             {
2348                 continue;
2349             }
2350 
2351             for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2352             {
2353                 e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2354 
2355                 /* Pick the best candidate for the partition acroos lists */
2356                 hme_pick_best_pu_cand(
2357                     &s_cand_data.as_pu_results[i],
2358                     ps_pu_data->aps_pu_results[0][e_part_id],
2359                     ps_pu_data->aps_pu_results[1][e_part_id],
2360                     ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2361                     ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2362                     0);
2363             }
2364 
2365             i4_total_cost =
2366                 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2367 
2368             if(i4_total_cost < best_cost)
2369             {
2370                 /* Stores the index of the best part_type in the sub-catoegory */
2371                 best_cost = i4_total_cost;
2372 
2373                 ps_cand_container[i4_num_cands] = s_cand_data;
2374 
2375                 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2376                 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2377             }
2378 
2379             part_type_cnt++;
2380         }
2381 
2382         i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2383     }
2384 
2385     /* AMP */
2386     {
2387         S32 i4_total_cost;
2388 
2389         S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1;
2390         S32 start_part_type = PRT_2NxnU;
2391         S32 best_cost = MAX_32BIT_VAL;
2392         S32 part_type_cnt = 0;
2393 
2394         for(j = 0; j < num_part_types; j++)
2395         {
2396             if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
2397             {
2398                 continue;
2399             }
2400 
2401             for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
2402             {
2403                 e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
2404 
2405                 /* Pick the best candidate for the partition acroos lists */
2406                 hme_pick_best_pu_cand(
2407                     &s_cand_data.as_pu_results[i],
2408                     ps_pu_data->aps_pu_results[0][e_part_id],
2409                     ps_pu_data->aps_pu_results[1][e_part_id],
2410                     ps_pu_data->u1_num_results_per_part_l0[e_part_id],
2411                     ps_pu_data->u1_num_results_per_part_l1[e_part_id],
2412                     0);
2413             }
2414 
2415             i4_total_cost =
2416                 s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
2417 
2418             if(i4_total_cost < best_cost)
2419             {
2420                 /* Stores the index of the best part_type in the sub-catoegory */
2421                 best_cost = i4_total_cost;
2422 
2423                 ps_cand_container[i4_num_cands] = s_cand_data;
2424 
2425                 ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
2426                 ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
2427             }
2428 
2429             part_type_cnt++;
2430         }
2431 
2432         i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
2433     }
2434 
2435     return i4_num_cands;
2436 }
2437 
2438 /**
2439 *****************************************************************************
2440 *  @fn     hme_decide_part_types(search_results_t *ps_search_results)
2441 *
2442 *  @brief  Does uni/bi evaluation accross various partition types,
2443 *          decides best inter partition types for the CU, compares
2444 *          intra cost and decides the best K results for the CU
2445 *
2446 *          This is called post subpel refinmenent for 16x16s, 8x8s and
2447 *          for post merge evaluation for 32x32,64x64 CUs
2448 *
2449 *  @param[in,out] ps_search_results : Search results data structure
2450 *                 - In : 2 lists of upto 2mvs & refids, active partition mask
2451 *                 - Out: Best results for final rdo evaluation of the cu
2452 *
2453 *  @param[in]     ps_subpel_prms : Sub pel params data structure
2454 *
2455 *
2456 *  @par Description
2457 *    --------------------------------------------------------------------------------
2458 *     Flow:
2459 *            for each category (SMP,AMP,2Nx2N based on part mask)
2460 *            {
2461 *                for each part_type
2462 *                {
2463 *                    for each part
2464 *                        pick best candidate from each list
2465 *                    combine uni part type
2466 *                    update best results for part type
2467 *                }
2468 *                pick the best part type for given category (for SMP & AMP)
2469 *            }
2470 *                    ||
2471 *                    ||
2472 *                    \/
2473 *           Bi-Pred evaluation:
2474 *            for upto 4 best part types
2475 *            {
2476 *                for each part
2477 *                {
2478 *                    compute fixed size had for all uni and remember coeffs
2479 *                    compute bisatd
2480 *                    uni vs bi and gives upto two results
2481 *                    also gives the pt level pred buffer
2482 *                }
2483 *             }
2484 *                    ||
2485 *                    ||
2486 *                    \/
2487 *            select X candidates for tu recursion as per the Note below
2488 *               tu_rec_on_part_type (reuse transform coeffs)
2489 *                    ||
2490 *                    ||
2491 *                    \/
2492 *            insert intra nodes at appropriate result id
2493 *                    ||
2494 *                    ||
2495 *                    \/
2496 *            populate y best resuls for rdo based on preset
2497 *
2498 *     Note :
2499 *     number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2500 *     number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
2501 *     --------------------------------------------------------------------------------
2502 *
2503 *  @return None
2504 ********************************************************************************
2505 */
hme_decide_part_types(inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,inter_ctb_prms_t * ps_inter_ctb_prms,me_frm_ctxt_t * ps_ctxt,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)2506 void hme_decide_part_types(
2507     inter_cu_results_t *ps_cu_results,
2508     inter_pu_results_t *ps_pu_results,
2509     inter_ctb_prms_t *ps_inter_ctb_prms,
2510     me_frm_ctxt_t *ps_ctxt,
2511     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
2512     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
2513 
2514 )
2515 {
2516     S32 i, j;
2517     S32 i4_part_mask;
2518     ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2519     ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
2520     S32 i4_noise_term;
2521     WORD32 e_part_id;
2522 
2523     PF_SAD_FXN_TU_REC apf_err_compute[4];
2524 
2525     part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS];
2526     part_type_results_t *ps_part_type_results;
2527 
2528     S32 num_best_cand = 0;
2529     const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
2530 
2531     i4_part_mask = ps_cu_results->i4_part_mask;
2532 
2533     num_best_cand = hme_tu_recur_cand_harvester(
2534         as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask);
2535 
2536     /* Partition ID for the current PU */
2537     e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0];
2538 
2539     ps_part_type_results = as_part_type_results;
2540     for(i = 0; i < num_best_cand; i++)
2541     {
2542         hme_compute_pred_and_evaluate_bi(
2543             ps_cu_results,
2544             ps_pu_results,
2545             ps_inter_ctb_prms,
2546             &(ps_part_type_results[i]),
2547             au8_pred_sigmaXSquare[i],
2548             au8_pred_sigmaX[i],
2549             ps_cmn_utils_optimised_function_list,
2550             ps_me_optimised_function_list
2551 
2552         );
2553     }
2554     /* Perform TU_REC on the best candidates selected */
2555     {
2556         WORD32 i4_sad_grid;
2557         WORD32 ai4_tu_split_flag[4];
2558         WORD32 ai4_tu_early_cbf[4];
2559 
2560         WORD32 best_cost[NUM_BEST_ME_OUTPUTS];
2561         WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS];
2562         WORD16 i2_wght;
2563         WORD32 i4_satd;
2564 
2565         err_prms_t s_err_prms;
2566         err_prms_t *ps_err_prms = &s_err_prms;
2567 
2568         /* Default cost and final idx initialization */
2569         for(i = 0; i < num_best_cand; i++)
2570         {
2571             best_cost[i] = MAX_32BIT_VAL;
2572             ai4_final_idx[i] = -1;
2573         }
2574 
2575         /* Assign the stad function to the err_compute function pointer :
2576         Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */
2577         apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
2578         apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
2579         apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
2580         apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
2581 
2582         ps_err_prms->pi4_sad_grid = &i4_sad_grid;
2583         ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag;
2584         ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth;
2585         ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf;
2586         ps_err_prms->i4_grid_mask = 1;
2587         ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem;
2588         ps_err_prms->u1_max_tr_size = 32;
2589 
2590         if(ps_inter_ctb_prms->u1_is_cu_noisy)
2591         {
2592             ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY;
2593         }
2594 
2595         /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that
2596         are disabled by Part_mask */
2597         for(i = 0; i < num_best_cand; i++)
2598         {
2599             part_type_results_t *ps_best_results;
2600             pu_result_t *ps_pu_result;
2601             WORD32 part_type_cost;
2602             WORD32 cand_idx;
2603 
2604             WORD32 pred_dir;
2605             S32 i4_inp_off;
2606 
2607             S32 lambda;
2608             U08 lambda_qshift;
2609             U08 *apu1_inp[MAX_NUM_INTER_PARTS];
2610             S16 ai2_wt[MAX_NUM_INTER_PARTS];
2611             S32 ai4_inv_wt[MAX_NUM_INTER_PARTS];
2612             S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS];
2613 
2614             WORD32 part_type = ps_part_type_results[i].u1_part_type;
2615             WORD32 e_cu_size = ps_cu_results->u1_cu_size;
2616             WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
2617             U08 u1_num_parts = gau1_num_parts_in_part_type[part_type];
2618             U08 u1_inp_buf_idx = UCHAR_MAX;
2619 
2620             ps_err_prms->i4_part_mask = i4_part_mask;
2621             ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
2622             ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
2623             ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred;
2624             ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride;
2625 
2626             /* Current offset for the present part type */
2627             i4_inp_off = ps_cu_results->i4_inp_offset;
2628 
2629             ps_best_results = &(ps_part_type_results[i]);
2630 
2631             part_type_cost = 0;
2632             lambda = ps_inter_ctb_prms->i4_lamda;
2633             lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift;
2634 
2635             for(j = 0; j < u1_num_parts; j++)
2636             {
2637                 ps_pu_result = &(ps_best_results->as_pu_results[j]);
2638 
2639                 pred_dir = ps_pu_result->pu.b2_pred_mode;
2640 
2641                 if(PRED_L0 == pred_dir)
2642                 {
2643                     apu1_inp[j] =
2644                         ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] +
2645                         i4_inp_off;
2646                     ai2_wt[j] =
2647                         ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx]
2648                             ->s_weight_offset.i2_luma_weight;
2649                     ai4_inv_wt[j] =
2650                         ps_inter_ctb_prms->pi4_inv_wt
2651                             [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2652                     ai4_inv_wt_shift_val[j] =
2653                         ps_inter_ctb_prms->pi4_inv_wt_shift_val
2654                             [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
2655                 }
2656                 else if(PRED_L1 == pred_dir)
2657                 {
2658                     apu1_inp[j] =
2659                         ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] +
2660                         i4_inp_off;
2661                     ai2_wt[j] =
2662                         ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx]
2663                             ->s_weight_offset.i2_luma_weight;
2664                     ai4_inv_wt[j] =
2665                         ps_inter_ctb_prms->pi4_inv_wt
2666                             [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2667                     ai4_inv_wt_shift_val[j] =
2668                         ps_inter_ctb_prms->pi4_inv_wt_shift_val
2669                             [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
2670                 }
2671                 else if(PRED_BI == pred_dir)
2672                 {
2673                     apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off;
2674                     ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc;
2675                     ai4_inv_wt[j] = i4_default_src_wt;
2676                     ai4_inv_wt_shift_val[j] = 0;
2677                 }
2678                 else
2679                 {
2680                     ASSERT(0);
2681                 }
2682 
2683                 part_type_cost += ps_pu_result->i4_mv_cost;
2684             }
2685 
2686             if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1]))
2687             {
2688                 ps_err_prms->pu1_inp = apu1_inp[0];
2689                 ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride;
2690                 i2_wght = ai2_wt[0];
2691             }
2692             else
2693             {
2694                 if(1 != ihevce_get_free_pred_buf_indices(
2695                             &u1_inp_buf_idx,
2696                             &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2697                             1))
2698                 {
2699                     ASSERT(0);
2700                 }
2701                 else
2702                 {
2703                     U08 *pu1_dst =
2704                         ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2705                     U08 *pu1_src = apu1_inp[0];
2706                     U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2;
2707                     U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2;
2708                     U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2;
2709                     U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2;
2710 
2711                     ps_cmn_utils_optimised_function_list->pf_copy_2d(
2712                         pu1_dst,
2713                         MAX_CU_SIZE,
2714                         pu1_src,
2715                         ps_inter_ctb_prms->i4_inp_stride,
2716                         u1_pu1_wd,
2717                         u1_pu1_ht);
2718 
2719                     pu1_dst +=
2720                         (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2721                              ? u1_pu1_ht * MAX_CU_SIZE
2722                              : u1_pu1_wd);
2723                     pu1_src =
2724                         apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
2725                                            ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride
2726                                            : u1_pu1_wd);
2727 
2728                     ps_cmn_utils_optimised_function_list->pf_copy_2d(
2729                         pu1_dst,
2730                         MAX_CU_SIZE,
2731                         pu1_src,
2732                         ps_inter_ctb_prms->i4_inp_stride,
2733                         u1_pu2_wd,
2734                         u1_pu2_ht);
2735 
2736                     ps_err_prms->pu1_inp =
2737                         ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
2738                     ps_err_prms->i4_inp_stride = MAX_CU_SIZE;
2739                     i2_wght = ai2_wt[1];
2740                 }
2741             }
2742 
2743 #if !DISABLE_TU_RECURSION
2744             i4_satd = apf_err_compute[e_cu_size](
2745                 ps_err_prms,
2746                 lambda,
2747                 lambda_qshift,
2748                 ps_inter_ctb_prms->i4_qstep_ls8,
2749                 ps_ctxt->ps_func_selector);
2750 #else
2751             ps_err_prms->pi4_sad_grid = &i4_satd;
2752 
2753             pf_err_compute(ps_err_prms);
2754 
2755             if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64))
2756             {
2757                 ai4_tu_split_flag[0] = 1;
2758                 ai4_tu_split_flag[1] = 1;
2759                 ai4_tu_split_flag[2] = 1;
2760                 ai4_tu_split_flag[3] = 1;
2761 
2762                 ps_err_prms->i4_tu_split_cost = 0;
2763             }
2764             else
2765             {
2766                 ai4_tu_split_flag[0] = 1;
2767                 ai4_tu_split_flag[1] = 1;
2768                 ai4_tu_split_flag[2] = 1;
2769                 ai4_tu_split_flag[3] = 1;
2770 
2771                 ps_err_prms->i4_tu_split_cost = 0;
2772             }
2773 #endif
2774 
2775 #if UNI_SATD_SCALE
2776             i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc;
2777 #endif
2778 
2779             if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
2780             {
2781                 ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX;
2782                 ULWORD64 u8_src_variance, u8_pred_variance;
2783                 unsigned long u4_shift_val;
2784                 S32 i4_bits_req;
2785                 S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
2786 
2787                 if(1 == u1_num_parts)
2788                 {
2789                     u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0];
2790                     u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX;
2791 
2792                     if(e_cu_size == CU_8x8)
2793                     {
2794                         PART_ID_T e_part_id = (PART_ID_T)(
2795                             (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) +
2796                             ((ps_cu_results->u1_y_off & 1) << 1));
2797 
2798                         u4_shift_val = ihevce_calc_stim_injected_variance(
2799                             ps_inter_ctb_prms->pu8_part_src_sigmaX,
2800                             ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2801                             &u8_src_variance,
2802                             ai4_inv_wt[0],
2803                             ai4_inv_wt_shift_val[0],
2804                             ps_inter_ctb_prms->wpred_log_wdc,
2805                             e_part_id);
2806                     }
2807                     else
2808                     {
2809                         u4_shift_val = ihevce_calc_stim_injected_variance(
2810                             ps_inter_ctb_prms->pu8_part_src_sigmaX,
2811                             ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2812                             &u8_src_variance,
2813                             ai4_inv_wt[0],
2814                             ai4_inv_wt_shift_val[0],
2815                             ps_inter_ctb_prms->wpred_log_wdc,
2816                             e_part_id);
2817                     }
2818 
2819                     u8_pred_variance = u8_pred_variance >> u4_shift_val;
2820 
2821                     GETRANGE64(i4_bits_req, u8_pred_variance);
2822 
2823                     if(i4_bits_req > 27)
2824                     {
2825                         u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2826                         u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2827                     }
2828 
2829                     if(u8_src_variance == u8_pred_variance)
2830                     {
2831                         u8_temp_var = (1 << STIM_Q_FORMAT);
2832                     }
2833                     else
2834                     {
2835                         u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2836                         u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2837                         u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2838                                        (u8_pred_variance * u8_pred_variance);
2839                         u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2840                         u8_temp_var = (u8_temp_var / u8_temp_var1);
2841                     }
2842 
2843                     i4_noise_term = (UWORD32)u8_temp_var;
2844 
2845                     ASSERT(i4_noise_term >= 0);
2846 
2847                     i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2848 
2849                     u8_temp_var = i4_satd;
2850                     u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2851                     u8_temp_var += (1 << ((i4_q_level)-1));
2852                     i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2853                 }
2854                 else /*if(e_cu_size <= CU_16x16)*/
2855                 {
2856                     unsigned long temp_shift_val;
2857                     PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = {
2858                         ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1]
2859                     };
2860 
2861                     u4_shift_val = ihevce_calc_variance_for_diff_weights(
2862                         ps_inter_ctb_prms->pu8_part_src_sigmaX,
2863                         ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
2864                         &u8_src_variance,
2865                         ai4_inv_wt,
2866                         ai4_inv_wt_shift_val,
2867                         ps_best_results->as_pu_results,
2868                         ps_inter_ctb_prms->wpred_log_wdc,
2869                         ae_part_id,
2870                         gau1_blk_size_to_wd[e_blk_size],
2871                         u1_num_parts,
2872                         1);
2873 
2874                     temp_shift_val = u4_shift_val;
2875 
2876                     u4_shift_val = ihevce_calc_variance_for_diff_weights(
2877                         au8_pred_sigmaX[i],
2878                         au8_pred_sigmaXSquare[i],
2879                         &u8_pred_variance,
2880                         ai4_inv_wt,
2881                         ai4_inv_wt_shift_val,
2882                         ps_best_results->as_pu_results,
2883                         0,
2884                         ae_part_id,
2885                         gau1_blk_size_to_wd[e_blk_size],
2886                         u1_num_parts,
2887                         0);
2888 
2889                     u8_pred_variance = u8_pred_variance >> temp_shift_val;
2890 
2891                     GETRANGE64(i4_bits_req, u8_pred_variance);
2892 
2893                     if(i4_bits_req > 27)
2894                     {
2895                         u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
2896                         u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
2897                     }
2898 
2899                     if(u8_src_variance == u8_pred_variance)
2900                     {
2901                         u8_temp_var = (1 << STIM_Q_FORMAT);
2902                     }
2903                     else
2904                     {
2905                         u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
2906                         u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
2907                         u8_temp_var1 = (u8_src_variance * u8_src_variance) +
2908                                        (u8_pred_variance * u8_pred_variance);
2909                         u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
2910                         u8_temp_var = (u8_temp_var / u8_temp_var1);
2911                     }
2912 
2913                     i4_noise_term = (UWORD32)u8_temp_var;
2914 
2915                     ASSERT(i4_noise_term >= 0);
2916                     ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT)));
2917 
2918                     i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
2919 
2920                     u8_temp_var = i4_satd;
2921                     u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
2922                     u8_temp_var += (1 << ((i4_q_level)-1));
2923                     i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
2924 
2925                     ASSERT(i4_satd >= 0);
2926                 }
2927             }
2928 
2929             if(u1_inp_buf_idx != UCHAR_MAX)
2930             {
2931                 ihevce_set_pred_buf_as_free(
2932                     &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
2933                     u1_inp_buf_idx);
2934             }
2935 
2936             part_type_cost += i4_satd;
2937 
2938             /*Update the best results with the new results */
2939             ps_best_results->i4_tot_cost = part_type_cost;
2940 
2941             ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost;
2942 
2943             ASSERT(ai4_tu_split_flag[0] >= 0);
2944             if(e_cu_size == CU_64x64)
2945             {
2946                 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2947                 ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1];
2948                 ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2];
2949                 ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3];
2950 
2951                 /* Update the TU early cbf flags into the best results structure */
2952                 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2953                 ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1];
2954                 ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2];
2955                 ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3];
2956             }
2957             else
2958             {
2959                 ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
2960                 ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
2961             }
2962 
2963             if(part_type_cost < best_cost[num_best_cand - 1])
2964             {
2965                 /* Push and sort current part type if it is one of the num_best_cand */
2966                 for(cand_idx = 0; cand_idx < i; cand_idx++)
2967                 {
2968                     if(part_type_cost <= best_cost[cand_idx])
2969                     {
2970                         memmove(
2971                             &ai4_final_idx[cand_idx + 1],
2972                             &ai4_final_idx[cand_idx],
2973                             sizeof(WORD32) * (i - cand_idx));
2974                         memmove(
2975                             &best_cost[cand_idx + 1],
2976                             &best_cost[cand_idx],
2977                             sizeof(WORD32) * (i - cand_idx));
2978                         break;
2979                     }
2980                 }
2981 
2982                 ai4_final_idx[cand_idx] = i;
2983                 best_cost[cand_idx] = part_type_cost;
2984             }
2985         }
2986 
2987         ps_cu_results->u1_num_best_results = num_best_cand;
2988 
2989         for(i = 0; i < num_best_cand; i++)
2990         {
2991             ASSERT(ai4_final_idx[i] < num_best_cand);
2992 
2993             if(ai4_final_idx[i] != -1)
2994             {
2995                 memcpy(
2996                     &(ps_cu_results->ps_best_results[i]),
2997                     &(ps_part_type_results[ai4_final_idx[i]]),
2998                     sizeof(part_type_results_t));
2999             }
3000         }
3001     }
3002 
3003     for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++)
3004     {
3005         ihevce_set_pred_buf_as_free(
3006             &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i);
3007     }
3008 }
3009 
3010 /**
3011 **************************************************************************************************
3012 *  @fn     hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3013 *
3014 *  @brief Does the population of the inter_cu_results structure with the results after the
3015 *           subpel refinement
3016 *
3017 *          This is called post subpel refinmenent for 16x16s, 8x8s and
3018 *          for post merge evaluation for 32x32,64x64 CUs
3019 *
3020 *  @param[in,out] ps_search_results : Search results data structure
3021 *                 - ps_cu_results : cu_results data structure
3022 *                   ps_pu_result  : Pointer to the memory for storing PU's
3023 *
3024 ****************************************************************************************************
3025 */
hme_populate_pus(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,hme_subpel_prms_t * ps_subpel_prms,search_results_t * ps_search_results,inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result,inter_ctb_prms_t * ps_inter_ctb_prms,wgt_pred_ctxt_t * ps_wt_prms,layer_ctxt_t * ps_curr_layer,U08 * pu1_pred_dir_searched,WORD32 i4_num_active_ref)3026 void hme_populate_pus(
3027     me_ctxt_t *ps_thrd_ctxt,
3028     me_frm_ctxt_t *ps_ctxt,
3029     hme_subpel_prms_t *ps_subpel_prms,
3030     search_results_t *ps_search_results,
3031     inter_cu_results_t *ps_cu_results,
3032     inter_pu_results_t *ps_pu_results,
3033     pu_result_t *ps_pu_result,
3034     inter_ctb_prms_t *ps_inter_ctb_prms,
3035     wgt_pred_ctxt_t *ps_wt_prms,
3036     layer_ctxt_t *ps_curr_layer,
3037     U08 *pu1_pred_dir_searched,
3038     WORD32 i4_num_active_ref)
3039 {
3040     WORD32 i, j, k;
3041     WORD32 i4_part_mask;
3042     WORD32 i4_ref;
3043     UWORD8 e_part_id;
3044     pu_result_t *ps_curr_pu;
3045     search_node_t *ps_search_node;
3046     part_attr_t *ps_part_attr;
3047     UWORD8 e_cu_size = ps_search_results->e_cu_size;
3048     WORD32 num_results_per_part_l0 = 0;
3049     WORD32 num_results_per_part_l1 = 0;
3050     WORD32 i4_ref_id;
3051     WORD32 i4_total_act_ref;
3052 
3053     i4_part_mask = ps_search_results->i4_part_mask;
3054 
3055     /* pred_buf_mngr init */
3056     {
3057         hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD);
3058 
3059         ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX;
3060 
3061         for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++)
3062         {
3063             ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] =
3064                 ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3065             ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i);
3066         }
3067 
3068         ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
3069     }
3070 
3071     ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
3072     ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy;
3073     ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda;
3074 
3075     /* Populate the CU level parameters */
3076     ps_cu_results->u1_cu_size = ps_search_results->e_cu_size;
3077     ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3078     ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask;
3079     ps_cu_results->u1_x_off = ps_search_results->u1_x_off;
3080     ps_cu_results->u1_y_off = ps_search_results->u1_y_off;
3081 
3082     i4_total_act_ref =
3083         ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
3084     /*Populate the partition results
3085     Loop across all the active references that are enabled right now */
3086     for(i = 0; i < MAX_PART_TYPES; i++)
3087     {
3088         if(!(i4_part_mask & gai4_part_type_to_part_mask[i]))
3089         {
3090             continue;
3091         }
3092 
3093         for(j = 0; j < gau1_num_parts_in_part_type[i]; j++)
3094         {
3095             /* Partition ID for the current PU */
3096             e_part_id = (UWORD8)ge_part_type_to_part_id[i][j];
3097             ps_part_attr = &gas_part_attr_in_cu[e_part_id];
3098 
3099             num_results_per_part_l0 = 0;
3100             num_results_per_part_l1 = 0;
3101 
3102             ps_pu_results->aps_pu_results[0][e_part_id] =
3103                 ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST);
3104             ps_pu_results->aps_pu_results[1][e_part_id] =
3105                 ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3106 
3107             for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3108             {
3109                 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3110 
3111                 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3112                 {
3113                     ps_search_node =
3114                         &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k];
3115 
3116                     /* If subpel is done then the node is a valid candidate else break the loop */
3117                     if(ps_search_node->u1_subpel_done)
3118                     {
3119                         i4_ref_id = ps_search_node->i1_ref_idx;
3120 
3121                         ASSERT(i4_ref_id >= 0);
3122 
3123                         /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */
3124                         if(!u1_pred_dir)
3125                         {
3126                             ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] +
3127                                          num_results_per_part_l0;
3128 
3129                             ASSERT(
3130                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3131                                 ps_inter_ctb_prms->u1_num_active_ref_l0);
3132 
3133                             /* Always populate the ref_idx value in l0_ref_idx */
3134                             ps_curr_pu->pu.mv.i1_l0_ref_idx =
3135                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3136                             ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3137                             ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3138                             ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3139 
3140                             ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] =
3141                                 ps_wt_prms->apu1_wt_inp[i4_ref_id];
3142 
3143                             num_results_per_part_l0++;
3144                         }
3145                         else
3146                         {
3147                             ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] +
3148                                          num_results_per_part_l1;
3149 
3150                             ASSERT(
3151                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3152                                 ps_inter_ctb_prms->u1_num_active_ref_l1);
3153 
3154                             /* populate the ref_idx value in l1_ref_idx */
3155                             ps_curr_pu->pu.mv.i1_l1_ref_idx =
3156                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3157                             ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3158                             ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3159                             ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3160 
3161                             /* Copy the values from weighted params to common_frm_aprams */
3162                             ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] =
3163                                 ps_wt_prms->apu1_wt_inp[i4_ref_id];
3164 
3165                             num_results_per_part_l1++;
3166                         }
3167                         ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3168                         ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3169 
3170 #if UNI_SATD_SCALE
3171                         /*SATD is scaled by weight. Hence rescale the SATD */
3172                         ps_curr_pu->i4_tot_cost =
3173                             ((ps_search_node->i4_sad *
3174                                   ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3175                               (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3176                              ps_inter_ctb_prms->wpred_log_wdc) +
3177                             ps_search_node->i4_mv_cost;
3178 #endif
3179 
3180                         /* Packed format of the width and height */
3181                         ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1;
3182                         ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1;
3183 
3184                         ps_curr_pu->pu.b4_pos_x =
3185                             (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >>
3186                              2);
3187                         ps_curr_pu->pu.b4_pos_y =
3188                             (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >>
3189                              2);
3190 
3191                         ps_curr_pu->pu.b1_intra_flag = 0;
3192 
3193                         /* Unweighted input */
3194                         ps_inter_ctb_prms->pu1_non_wt_inp =
3195                             ps_wt_prms->apu1_wt_inp[i4_total_act_ref];
3196 
3197                         ps_search_node++;
3198                     }
3199                     else
3200                     {
3201                         break;
3202                     }
3203                 }
3204             }
3205 
3206             ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0;
3207             ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1;
3208         }
3209     }
3210 }
3211 
3212 /**
3213 *********************************************************************************************************
3214 *  @fn     hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
3215 *
3216 *  @brief Does the population of the inter_cu_results structure with the results after the
3217 *           subpel refinement
3218 *
3219 *          This is called post subpel refinmenent for 16x16s, 8x8s and
3220 *          for post merge evaluation for 32x32,64x64 CUs
3221 *
3222 *  @param[in,out] ps_search_results : Search results data structure
3223 *                 - ps_cu_results : cu_results data structure
3224 *                   ps_pu_results : Pointer for the PU's
3225 *                   ps_pu_result  : Pointer to the memory for storing PU's
3226 *
3227 *********************************************************************************************************
3228 */
hme_populate_pus_8x8_cu(me_ctxt_t * ps_thrd_ctxt,me_frm_ctxt_t * ps_ctxt,hme_subpel_prms_t * ps_subpel_prms,search_results_t * ps_search_results,inter_cu_results_t * ps_cu_results,inter_pu_results_t * ps_pu_results,pu_result_t * ps_pu_result,inter_ctb_prms_t * ps_inter_ctb_prms,U08 * pu1_pred_dir_searched,WORD32 i4_num_active_ref,U08 u1_blk_8x8_mask)3229 void hme_populate_pus_8x8_cu(
3230     me_ctxt_t *ps_thrd_ctxt,
3231     me_frm_ctxt_t *ps_ctxt,
3232     hme_subpel_prms_t *ps_subpel_prms,
3233     search_results_t *ps_search_results,
3234     inter_cu_results_t *ps_cu_results,
3235     inter_pu_results_t *ps_pu_results,
3236     pu_result_t *ps_pu_result,
3237     inter_ctb_prms_t *ps_inter_ctb_prms,
3238     U08 *pu1_pred_dir_searched,
3239     WORD32 i4_num_active_ref,
3240     U08 u1_blk_8x8_mask)
3241 {
3242     WORD32 i, k;
3243     WORD32 i4_part_mask;
3244     WORD32 i4_ref;
3245     pu_result_t *ps_curr_pu;
3246     search_node_t *ps_search_node;
3247     WORD32 i4_ref_id;
3248     WORD32 x_off, y_off;
3249 
3250     /* Make part mask available as only 2Nx2N
3251     Later support for 4x8 and 8x4 needs to be added */
3252     i4_part_mask = ENABLE_2Nx2N;
3253 
3254     x_off = ps_search_results->u1_x_off;
3255     y_off = ps_search_results->u1_y_off;
3256 
3257     for(i = 0; i < 4; i++)
3258     {
3259         if(u1_blk_8x8_mask & (1 << i))
3260         {
3261             UWORD8 u1_x_pos, u1_y_pos;
3262 
3263             WORD32 num_results_per_part_l0 = 0;
3264             WORD32 num_results_per_part_l1 = 0;
3265 
3266             ps_cu_results->u1_cu_size = CU_8x8;
3267             ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
3268             ps_cu_results->i4_part_mask = i4_part_mask;
3269             ps_cu_results->u1_x_off = x_off + (i & 1) * 8;
3270             ps_cu_results->u1_y_off = y_off + (i >> 1) * 8;
3271             ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64);
3272 
3273             ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL;
3274             ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3275 
3276             u1_x_pos = ps_cu_results->u1_x_off >> 2;
3277             u1_y_pos = ps_cu_results->u1_y_off >> 2;
3278 
3279             if(!(ps_search_results->i4_part_mask & ENABLE_NxN))
3280             {
3281                 ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0];
3282 
3283                 ps_cu_results->i4_part_mask = 0;
3284                 ps_cu_results->u1_num_best_results = 0;
3285 
3286                 ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3287 
3288                 ps_curr_pu->pu.b4_wd = 1;
3289                 ps_curr_pu->pu.b4_ht = 1;
3290                 ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3291                 ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3292                 ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
3293 
3294                 ps_cu_results++;
3295                 ps_pu_results++;
3296 
3297                 continue;
3298             }
3299 
3300             ps_pu_results->aps_pu_results[0][0] =
3301                 ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST);
3302             ps_pu_results->aps_pu_results[1][0] =
3303                 ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
3304 
3305             for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
3306             {
3307                 U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
3308 
3309                 /* Select the NxN partition node for the current ref_idx in the search results*/
3310                 ps_search_node =
3311                     ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i];
3312 
3313                 for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
3314                 {
3315                     /* If subpel is done then the node is a valid candidate else break the loop */
3316                     if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done))
3317                     {
3318                         i4_ref_id = ps_search_node->i1_ref_idx;
3319 
3320                         ASSERT(i4_ref_id >= 0);
3321 
3322                         if(!u1_pred_dir)
3323                         {
3324                             ps_curr_pu =
3325                                 ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3326 
3327                             ASSERT(
3328                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
3329                                 ps_inter_ctb_prms->u1_num_active_ref_l0);
3330 
3331                             ps_curr_pu->pu.mv.i1_l0_ref_idx =
3332                                 ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
3333                             ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
3334                             ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
3335                             ps_curr_pu->pu.b2_pred_mode = PRED_L0;
3336 
3337                             num_results_per_part_l0++;
3338                         }
3339                         else
3340                         {
3341                             ps_curr_pu =
3342                                 ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3343 
3344                             ASSERT(
3345                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
3346                                 ps_inter_ctb_prms->u1_num_active_ref_l1);
3347 
3348                             ps_curr_pu->pu.mv.i1_l1_ref_idx =
3349                                 ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
3350                             ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
3351                             ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
3352                             ps_curr_pu->pu.b2_pred_mode = PRED_L1;
3353 
3354                             num_results_per_part_l1++;
3355                         }
3356                         ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
3357                         ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
3358 
3359 #if UNI_SATD_SCALE
3360                         /*SATD is scaled by weight. Hence rescale the SATD */
3361                         ps_curr_pu->i4_tot_cost =
3362                             ((ps_search_node->i4_sad *
3363                                   ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
3364                               (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
3365                              ps_inter_ctb_prms->wpred_log_wdc) +
3366                             ps_search_node->i4_mv_cost;
3367 #endif
3368 
3369                         ps_curr_pu->pu.b4_wd = 1;
3370                         ps_curr_pu->pu.b4_ht = 1;
3371                         ps_curr_pu->pu.b4_pos_x = u1_x_pos;
3372                         ps_curr_pu->pu.b4_pos_y = u1_y_pos;
3373                         ps_curr_pu->pu.b1_intra_flag = 0;
3374 
3375                         ps_search_node++;
3376                     }
3377                     else
3378                     {
3379                         /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU
3380                         to remove 8x8's as possible candidates during evaluation */
3381 
3382                         ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
3383 
3384                         ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3385 
3386                         ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
3387 
3388                         ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
3389 
3390                         break;
3391                     }
3392                 }
3393             }
3394 
3395             /* Update the num_results per_part across lists L0 and L1 */
3396             ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0;
3397             ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1;
3398         }
3399         ps_cu_results++;
3400         ps_pu_results++;
3401     }
3402 }
3403 
3404 /**
3405 ********************************************************************************
3406 *  @fn     hme_insert_intra_nodes_post_bipred
3407 *
3408 *  @brief  Compares intra costs (populated by IPE) with the best inter costs
3409 *          (populated after evaluating bi-pred) and updates the best results
3410 *          if intra cost is better
3411 *
3412 *  @param[in,out]  ps_cu_results    [inout] : Best results structure of CU
3413 *                  ps_cur_ipe_ctb   [in]    : intra results for the current CTB
3414 *                  i4_frm_qstep     [in]    : current frame quantizer(qscale)*
3415 *
3416 *  @return None
3417 ********************************************************************************
3418 */
hme_insert_intra_nodes_post_bipred(inter_cu_results_t * ps_cu_results,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb,WORD32 i4_frm_qstep)3419 void hme_insert_intra_nodes_post_bipred(
3420     inter_cu_results_t *ps_cu_results,
3421     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
3422     WORD32 i4_frm_qstep)
3423 {
3424     WORD32 i;
3425     WORD32 num_results;
3426     WORD32 cu_size = ps_cu_results->u1_cu_size;
3427     UWORD8 u1_x_off = ps_cu_results->u1_x_off;
3428     UWORD8 u1_y_off = ps_cu_results->u1_y_off;
3429 
3430     /* Id of the 32x32 block, 16x16 block in a CTB */
3431     WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5);
3432     WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1);
3433 
3434     /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */
3435     WORD32 disable_intra64 = 0;
3436     WORD32 disable_intra32 = 0;
3437     WORD32 disable_intra16 = 0;
3438 
3439     S32 i4_intra_2nx2n_cost;
3440 
3441     /* ME final results for this CU (post seeding of best uni/bi pred results) */
3442     part_type_results_t *ps_best_result;
3443 
3444     i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE;
3445 
3446     /*If inter candidates are enabled then enter the for loop to update the intra candidate */
3447 
3448     if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size))
3449     {
3450         ps_cu_results->u1_num_best_results = 1;
3451     }
3452 
3453     num_results = ps_cu_results->u1_num_best_results;
3454 
3455     ps_best_result = &ps_cu_results->ps_best_results[0];
3456 
3457     /* Disable intra16/32/64 flags based on split flags recommended by IPE */
3458     if(ps_cur_ipe_ctb->u1_split_flag)
3459     {
3460         disable_intra64 = 1;
3461         if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
3462         {
3463             disable_intra32 = 1;
3464 
3465             if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
3466                    .as_intra16_analyse[i4_16x16_id]
3467                    .b1_split_flag)
3468             {
3469                 disable_intra16 = 1;
3470             }
3471         }
3472     }
3473 
3474     /* Derive the intra cost based on current cu size and offset */
3475     switch(cu_size)
3476     {
3477     case CU_8x8:
3478     {
3479         i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)];
3480 
3481         /* Accounting for coding noise in the open loop IPE cost */
3482         i4_intra_2nx2n_cost +=
3483             ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3484 
3485         break;
3486     }
3487 
3488     case CU_16x16:
3489     {
3490         i4_intra_2nx2n_cost =
3491             ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)];
3492 
3493         /* Accounting for coding noise in the open loop IPE cost */
3494         i4_intra_2nx2n_cost +=
3495             ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */
3496 
3497         if(disable_intra16)
3498         {
3499             /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */
3500             i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3501         }
3502         break;
3503     }
3504 
3505     case CU_32x32:
3506     {
3507         i4_intra_2nx2n_cost =
3508             ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)];
3509 
3510         /* Accounting for coding noise in the open loop IPE cost */
3511         i4_intra_2nx2n_cost +=
3512             (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3513 
3514         if(disable_intra32)
3515         {
3516             /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */
3517             i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3518         }
3519         break;
3520     }
3521 
3522     case CU_64x64:
3523     {
3524         i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost;
3525 
3526         /* Accounting for coding noise in the open loop IPE cost */
3527         i4_intra_2nx2n_cost +=
3528             (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
3529 
3530         if(disable_intra64)
3531         {
3532             /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */
3533             i4_intra_2nx2n_cost = MAX_32BIT_VAL;
3534         }
3535         break;
3536     }
3537 
3538     default:
3539         ASSERT(0);
3540     }
3541 
3542     {
3543         /*****************************************************************/
3544         /* Intra / Inter cost comparison for  2Nx2N : cu size 8/16/32/64 */
3545         /* Identify where the current result isto be placed. Basically   */
3546         /* find the node which has cost just higher than node under test */
3547         /*****************************************************************/
3548         for(i = 0; i < num_results; i++)
3549         {
3550             /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */
3551             WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost;
3552 
3553             if(i4_intra_2nx2n_cost < inter_cost)
3554             {
3555                 if(i < (num_results - 1))
3556                 {
3557                     memmove(
3558                         ps_best_result + i + 1,
3559                         ps_best_result + i,
3560                         sizeof(ps_best_result[0]) * (num_results - 1 - i));
3561                 }
3562 
3563                 /* Insert the intra node result */
3564                 ps_best_result[i].u1_part_type = PRT_2Nx2N;
3565                 ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost;
3566                 ps_best_result[i].ai4_tu_split_flag[0] = 0;
3567                 ps_best_result[i].ai4_tu_split_flag[1] = 0;
3568                 ps_best_result[i].ai4_tu_split_flag[2] = 0;
3569                 ps_best_result[i].ai4_tu_split_flag[3] = 0;
3570 
3571                 /* Populate intra flag, cost and default mvs, refidx for intra pu */
3572                 ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost;
3573                 //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost;
3574                 ps_best_result[i].as_pu_results[0].i4_mv_cost = 0;
3575                 ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1;
3576                 ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1;
3577                 ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1;
3578                 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV;
3579                 ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV;
3580                 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV;
3581                 ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV;
3582 
3583                 break;
3584             }
3585         }
3586     }
3587 }
3588 
hme_recompute_lambda_from_min_8x8_act_in_ctb(me_frm_ctxt_t * ps_ctxt,ipe_l0_ctb_analyse_for_me_t * ps_cur_ipe_ctb)3589 S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
3590     me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb)
3591 {
3592     double lambda;
3593     double lambda_modifier;
3594     WORD32 i4_cu_qp;
3595     frm_lambda_ctxt_t *ps_frm_lambda_ctxt;
3596     //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
3597     WORD32 i4_frame_qp;
3598     rc_quant_t *ps_rc_quant_ctxt;
3599     WORD32 i4_is_bpic;
3600 
3601     ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt;
3602     //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base;
3603     i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp;
3604     ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt;
3605     i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled;
3606 
3607     i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset];
3608 
3609     {
3610         if(ps_ctxt->i4_l0me_qp_mod)
3611         {
3612 #if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
3613 #if LAMDA_BASED_ON_QUANT
3614             WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0];
3615 #else
3616             WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0];
3617 #endif
3618             i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
3619                        QP_LEVEL_MOD_ACT_FACTOR;
3620 
3621 #endif
3622         }
3623         if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale)
3624             i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale;
3625         else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale)
3626             i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale;
3627 
3628         i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp];
3629     }
3630 
3631     if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp)
3632         i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp;
3633     else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp)
3634         i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp;
3635 
3636     lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3));
3637 
3638     lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier;
3639 
3640     if(i4_is_bpic)
3641     {
3642         lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
3643     }
3644     if(ps_ctxt->i4_use_const_lamda_modifier)
3645     {
3646         if(ps_ctxt->s_frm_prms.is_i_pic)
3647         {
3648             lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
3649         }
3650         else
3651         {
3652             lambda_modifier = CONST_LAMDA_MOD_VAL;
3653         }
3654     }
3655     lambda *= lambda_modifier;
3656 
3657     return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)));
3658 }
3659 
3660 /**
3661 ********************************************************************************
3662 *  @fn     hme_update_dynamic_search_params
3663 *
3664 *  @brief  Update the Dynamic search params based on the current MVs
3665 *
3666 *  @param[in,out]  ps_dyn_range_prms    [inout] : Dyn. Range Param str.
3667 *                  i2_mvy               [in]    : current MV y comp.
3668 *
3669 *  @return None
3670 ********************************************************************************
3671 */
hme_update_dynamic_search_params(dyn_range_prms_t * ps_dyn_range_prms,WORD16 i2_mvy)3672 void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy)
3673 {
3674     /* If MV is up large, update i2_dyn_max_y */
3675     if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y)
3676         ps_dyn_range_prms->i2_dyn_max_y = i2_mvy;
3677     /* If MV is down large, update i2_dyn_min_y */
3678     if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y)
3679         ps_dyn_range_prms->i2_dyn_min_y = i2_mvy;
3680 }
3681 
hme_add_new_node_to_a_sorted_array(search_node_t * ps_result_node,search_node_t ** pps_sorted_array,U08 * pu1_shifts,U32 u4_num_results_updated,U08 u1_shift)3682 void hme_add_new_node_to_a_sorted_array(
3683     search_node_t *ps_result_node,
3684     search_node_t **pps_sorted_array,
3685     U08 *pu1_shifts,
3686     U32 u4_num_results_updated,
3687     U08 u1_shift)
3688 {
3689     U32 i;
3690 
3691     if(NULL == pu1_shifts)
3692     {
3693         S32 i4_cur_node_cost = ps_result_node->i4_tot_cost;
3694 
3695         for(i = 0; i < u4_num_results_updated; i++)
3696         {
3697             if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost)
3698             {
3699                 memmove(
3700                     &pps_sorted_array[i + 1],
3701                     &pps_sorted_array[i],
3702                     (u4_num_results_updated - i) * sizeof(search_node_t *));
3703 
3704                 break;
3705             }
3706         }
3707     }
3708     else
3709     {
3710         S32 i4_cur_node_cost =
3711             (u1_shift == 0) ? ps_result_node->i4_tot_cost
3712                             : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift;
3713 
3714         for(i = 0; i < u4_num_results_updated; i++)
3715         {
3716             S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost
3717                                                          : (pps_sorted_array[i]->i4_tot_cost +
3718                                                             (1 << (pu1_shifts[i] - 1))) >>
3719                                                                pu1_shifts[i];
3720 
3721             if(i4_cur_node_cost < i4_prev_node_cost)
3722             {
3723                 memmove(
3724                     &pps_sorted_array[i + 1],
3725                     &pps_sorted_array[i],
3726                     (u4_num_results_updated - i) * sizeof(search_node_t *));
3727                 memmove(
3728                     &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08));
3729 
3730                 break;
3731             }
3732         }
3733 
3734         pu1_shifts[i] = u1_shift;
3735     }
3736 
3737     pps_sorted_array[i] = ps_result_node;
3738 }
3739 
hme_find_pos_of_implicitly_stored_ref_id(S08 * pi1_ref_idx,S08 i1_ref_idx,S32 i4_result_id,S32 i4_num_results)3740 S32 hme_find_pos_of_implicitly_stored_ref_id(
3741     S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results)
3742 {
3743     S32 i;
3744 
3745     for(i = 0; i < i4_num_results; i++)
3746     {
3747         if(i1_ref_idx == pi1_ref_idx[i])
3748         {
3749             if(0 == i4_result_id)
3750             {
3751                 return i;
3752             }
3753             else
3754             {
3755                 i4_result_id--;
3756             }
3757         }
3758     }
3759 
3760     return -1;
3761 }
3762 
hme_search_node_populator(search_node_t * ps_search_node,hme_mv_t * ps_mv,S08 i1_ref_idx,S08 i1_mv_magnitude_shift)3763 static __inline void hme_search_node_populator(
3764     search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift)
3765 {
3766     ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift);
3767     ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift);
3768     ps_search_node->i1_ref_idx = i1_ref_idx;
3769     ps_search_node->u1_is_avail = 1;
3770     ps_search_node->u1_subpel_done = 0;
3771 }
3772 
hme_populate_search_candidates(fpel_srch_cand_init_data_t * ps_ctxt)3773 S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt)
3774 {
3775     hme_mv_t *ps_mv;
3776 
3777     S32 wd_c, ht_c, wd_p, ht_p;
3778     S32 blksize_p, blksize_c;
3779     S32 i;
3780     S08 *pi1_ref_idx;
3781     /* Cache for storing offsets */
3782     S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS];
3783 
3784     layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer;
3785     layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer;
3786     layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
3787     layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
3788     search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands;
3789     hme_mv_t s_zero_mv = { 0 };
3790 
3791     S32 i4_pos_x = ps_ctxt->i4_pos_x;
3792     S32 i4_pos_y = ps_ctxt->i4_pos_y;
3793     S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0;
3794     S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1;
3795     U08 u1_pred_dir = ps_ctxt->u1_pred_dir;
3796     U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr;
3797     U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank;
3798     U08 u1_num_results_in_coarse_mvbank =
3799         (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref)
3800                            : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref);
3801     S32 i4_init_offset_projected =
3802         (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0;
3803     S32 i4_init_offset_spatial =
3804         (u1_pred_dir_ctr == 1)
3805             ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank)
3806             : 0;
3807     U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index;
3808     U08 u1_max_num_search_cands =
3809         gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index];
3810     S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1);
3811     U16 u2_is_offset_available = 0;
3812     U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1;
3813 
3814     /* Width and ht of current and prev layers */
3815     wd_c = ps_curr_layer->i4_wd;
3816     ht_c = ps_curr_layer->i4_ht;
3817     wd_p = ps_coarse_layer->i4_wd;
3818     ht_p = ps_coarse_layer->i4_ht;
3819 
3820     blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size];
3821     blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size];
3822 
3823     /* ASSERT for valid sizes */
3824     ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
3825 
3826     {
3827         S32 x = i4_pos_x >> 4;
3828         S32 y = i4_pos_y >> 4;
3829 
3830         if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size])
3831         {
3832             x *= 2;
3833             y *= 2;
3834 
3835             u1_search_blk_to_spatial_mvbank_blk_size_factor = 2;
3836         }
3837 
3838         i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) *
3839                                   ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3840     }
3841 
3842     for(i = 0; i < i4_num_srch_cands; i++)
3843     {
3844         SEARCH_CANDIDATE_TYPE_T e_search_cand_type =
3845             gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index]
3846                                                                      [i];
3847         SEARCH_CAND_LOCATIONS_T e_search_cand_loc =
3848             gae_search_cand_type_to_location_map[e_search_cand_type];
3849         S08 i1_result_id = MIN(
3850             gai1_search_cand_type_to_result_id_map[e_search_cand_type],
3851             (e_search_cand_loc < 0 ? 0
3852                                    : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1));
3853         U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3854         U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
3855         U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) ||
3856                                 (ZERO_MV_ALTREF == e_search_cand_type);
3857 
3858         /* When spatial candidates are available, use them, else use the projected candidates */
3859         /* This is required since some blocks will never have certain spatial candidates, and in order */
3860         /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list,  */
3861         /* all candidates apart from the 'LEFT' have been marked as projected */
3862         if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) ||
3863             (e_search_cand_loc == TOPRIGHT)) &&
3864            (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand)
3865         {
3866             if(e_search_cand_loc == TOPLEFT)
3867             {
3868                 u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available ||
3869                                      !ps_ctxt->u1_is_left_available;
3870             }
3871             else if(e_search_cand_loc == TOPRIGHT)
3872             {
3873                 u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available;
3874             }
3875             else
3876             {
3877                 u1_is_spatial_cand = ps_ctxt->u1_is_top_available;
3878             }
3879 
3880             u1_is_proj_cand = !u1_is_spatial_cand;
3881         }
3882 
3883         switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2))
3884         {
3885         case 1:
3886         {
3887             hme_search_node_populator(
3888                 ps_search_cands[i].ps_search_node,
3889                 &s_zero_mv,
3890                 (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id
3891                                                 : ps_ctxt->i1_alt_default_ref_id,
3892                 0);
3893 
3894             break;
3895         }
3896         case 2:
3897         {
3898             S08 i1_mv_magnitude_shift = 0;
3899 
3900             S32 i4_offset = i4_init_offset_spatial;
3901 
3902             i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1);
3903             i4_offset += i1_result_id;
3904 
3905             switch(e_search_cand_loc)
3906             {
3907             case LEFT:
3908             {
3909                 if(ps_ctxt->u1_is_left_available)
3910                 {
3911                     i1_mv_magnitude_shift = -2;
3912 
3913                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3914 
3915                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3916                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3917                 }
3918                 else
3919                 {
3920                     i1_mv_magnitude_shift = 0;
3921 
3922                     ps_mv = &s_zero_mv;
3923                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3924                 }
3925 
3926                 break;
3927             }
3928             case TOPLEFT:
3929             {
3930                 if(ps_ctxt->u1_is_topLeft_available)
3931                 {
3932                     i1_mv_magnitude_shift = -2;
3933 
3934                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
3935                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3936 
3937                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3938                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3939                 }
3940                 else
3941                 {
3942                     i1_mv_magnitude_shift = 0;
3943 
3944                     ps_mv = &s_zero_mv;
3945                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3946                 }
3947 
3948                 break;
3949             }
3950             case TOP:
3951             {
3952                 if(ps_ctxt->u1_is_top_available)
3953                 {
3954                     i1_mv_magnitude_shift = -2;
3955 
3956                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3957 
3958                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3959                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3960                 }
3961                 else
3962                 {
3963                     i1_mv_magnitude_shift = 0;
3964 
3965                     ps_mv = &s_zero_mv;
3966                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3967                 }
3968 
3969                 break;
3970             }
3971             case TOPRIGHT:
3972             {
3973                 if(ps_ctxt->u1_is_topRight_available)
3974                 {
3975                     i1_mv_magnitude_shift = -2;
3976 
3977                     i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk *
3978                                  u1_search_blk_to_spatial_mvbank_blk_size_factor;
3979                     i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
3980 
3981                     ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
3982                     pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
3983                 }
3984                 else
3985                 {
3986                     i1_mv_magnitude_shift = 0;
3987                     ps_mv = &s_zero_mv;
3988                     pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
3989                 }
3990 
3991                 break;
3992             }
3993             default:
3994             {
3995                 /* AiyAiyYo!! */
3996                 ASSERT(0);
3997             }
3998             }
3999 
4000             hme_search_node_populator(
4001                 ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift);
4002 
4003             break;
4004         }
4005         case 4:
4006         {
4007             ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type);
4008             ASSERT(ILLUSORY_LOCATION != e_search_cand_loc);
4009 
4010             i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1);
4011 
4012             if(!(u2_is_offset_available & (1 << e_search_cand_loc)))
4013             {
4014                 S32 x, y;
4015 
4016                 x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc];
4017                 y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc];
4018 
4019                 /* Safety check to avoid uninitialized access across temporal layers */
4020                 x = CLIP3(x, 0, (wd_c - blksize_p));
4021                 y = CLIP3(y, 0, (ht_c - blksize_p));
4022 
4023                 /* Project the positions to prev layer */
4024                 x = x >> blksize_p;
4025                 y = y >> blksize_p;
4026 
4027                 ai4_cand_offsets[e_search_cand_loc] =
4028                     (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk);
4029                 ai4_cand_offsets[e_search_cand_loc] +=
4030                     (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row);
4031                 ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected;
4032 
4033                 u2_is_offset_available |= (1 << e_search_cand_loc);
4034             }
4035 
4036             ps_mv =
4037                 ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4038             pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx +
4039                           ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
4040 
4041             hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1);
4042 
4043             break;
4044         }
4045         default:
4046         {
4047             /* NoNoNoNoNooooooooNO! */
4048             ASSERT(0);
4049         }
4050         }
4051 
4052         ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0);
4053         ASSERT(
4054             !u1_pred_dir
4055                 ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4056                    i4_num_act_ref_l0)
4057                 : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
4058                    ps_ctxt->i4_num_act_ref_l1));
4059     }
4060 
4061     return i4_num_srch_cands;
4062 }
4063 
hme_mv_clipper(hme_search_prms_t * ps_search_prms_blk,S32 i4_num_srch_cands,S08 i1_check_for_mult_refs,U08 u1_fpel_refine_extent,U08 u1_hpel_refine_extent,U08 u1_qpel_refine_extent)4064 void hme_mv_clipper(
4065     hme_search_prms_t *ps_search_prms_blk,
4066     S32 i4_num_srch_cands,
4067     S08 i1_check_for_mult_refs,
4068     U08 u1_fpel_refine_extent,
4069     U08 u1_hpel_refine_extent,
4070     U08 u1_qpel_refine_extent)
4071 {
4072     S32 candt;
4073     range_prms_t *ps_range_prms;
4074 
4075     for(candt = 0; candt < i4_num_srch_cands; candt++)
4076     {
4077         search_node_t *ps_search_node;
4078 
4079         ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node;
4080         ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx];
4081 
4082         /* Clip the motion vectors as well here since after clipping
4083         two candidates can become same and they will be removed during deduplication */
4084         CLIP_MV_WITHIN_RANGE(
4085             ps_search_node->ps_mv->i2_mvx,
4086             ps_search_node->ps_mv->i2_mvy,
4087             ps_range_prms,
4088             u1_fpel_refine_extent,
4089             u1_hpel_refine_extent,
4090             u1_qpel_refine_extent);
4091     }
4092 }
4093 
hme_init_pred_buf_info(hme_pred_buf_info_t (* ps_info)[MAX_NUM_INTER_PARTS],hme_pred_buf_mngr_t * ps_buf_mngr,U08 u1_pu1_wd,U08 u1_pu1_ht,PART_TYPE_T e_part_type)4094 void hme_init_pred_buf_info(
4095     hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
4096     hme_pred_buf_mngr_t *ps_buf_mngr,
4097     U08 u1_pu1_wd,
4098     U08 u1_pu1_ht,
4099     PART_TYPE_T e_part_type)
4100 {
4101     U08 u1_pred_buf_array_id;
4102 
4103     if(1 != ihevce_get_free_pred_buf_indices(
4104                 &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1))
4105     {
4106         ASSERT(0);
4107     }
4108     else
4109     {
4110         ps_info[0][0].i4_pred_stride = MAX_CU_SIZE;
4111         ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id];
4112         ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id;
4113 
4114         if(PRT_2Nx2N != e_part_type)
4115         {
4116             ps_info[0][1].i4_pred_stride = MAX_CU_SIZE;
4117             ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] +
4118                                      (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]
4119                                           ? u1_pu1_ht * ps_info[0][1].i4_pred_stride
4120                                           : u1_pu1_wd);
4121             ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id;
4122         }
4123     }
4124 }
4125 
hme_debrief_bipred_eval(part_type_results_t * ps_part_type_result,hme_pred_buf_info_t (* ps_pred_buf_info)[MAX_NUM_INTER_PARTS],hme_pred_buf_mngr_t * ps_pred_buf_mngr,U08 * pu1_allocated_pred_buf_array_indixes,ihevce_cmn_opt_func_t * ps_cmn_utils_optimised_function_list)4126 void hme_debrief_bipred_eval(
4127     part_type_results_t *ps_part_type_result,
4128     hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
4129     hme_pred_buf_mngr_t *ps_pred_buf_mngr,
4130     U08 *pu1_allocated_pred_buf_array_indixes,
4131     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
4132 
4133 )
4134 {
4135     PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type;
4136 
4137     U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator;
4138     U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]];
4139 
4140     if(0 == ps_part_type_result->u1_part_type)
4141     {
4142         if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI)
4143         {
4144             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4145 
4146             ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4147             ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4148 
4149             ihevce_set_pred_buf_as_free(
4150                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4151 
4152             ihevce_set_pred_buf_as_free(
4153                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4154         }
4155         else
4156         {
4157             ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4158             ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4159 
4160             ihevce_set_pred_buf_as_free(
4161                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4162 
4163             ihevce_set_pred_buf_as_free(
4164                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4165 
4166             if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4167             {
4168                 ihevce_set_pred_buf_as_free(
4169                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4170             }
4171         }
4172     }
4173     else
4174     {
4175         U08 *pu1_src_pred;
4176         U08 *pu1_dst_pred;
4177         S32 i4_src_pred_stride;
4178         S32 i4_dst_pred_stride;
4179 
4180         U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2;
4181         U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2;
4182         U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2;
4183         U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2;
4184 
4185         U08 u1_condition_for_switch =
4186             (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) |
4187             ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1);
4188 
4189         switch(u1_condition_for_switch)
4190         {
4191         case 0:
4192         {
4193             ps_part_type_result->pu1_pred =
4194                 ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]];
4195             ps_part_type_result->i4_pred_stride = MAX_CU_SIZE;
4196 
4197             ihevce_set_pred_buf_as_free(
4198                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4199 
4200             ihevce_set_pred_buf_as_free(
4201                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4202 
4203             if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
4204             {
4205                 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4206                 pu1_dst_pred = ps_part_type_result->pu1_pred;
4207                 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4208                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4209 
4210                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4211                     pu1_dst_pred,
4212                     i4_dst_pred_stride,
4213                     pu1_src_pred,
4214                     i4_src_pred_stride,
4215                     u1_pu1_wd,
4216                     u1_pu1_ht);
4217             }
4218 
4219             if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id)
4220             {
4221                 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4222                 pu1_dst_pred = ps_part_type_result->pu1_pred +
4223                                (u1_is_part_vertical
4224                                     ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4225                                     : u1_pu1_wd);
4226                 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4227                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4228 
4229                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4230                     pu1_dst_pred,
4231                     i4_dst_pred_stride,
4232                     pu1_src_pred,
4233                     i4_src_pred_stride,
4234                     u1_pu2_wd,
4235                     u1_pu2_ht);
4236             }
4237 
4238             break;
4239         }
4240         case 1:
4241         {
4242             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4243 
4244             ihevce_set_pred_buf_as_free(
4245                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4246 
4247             /* Copy PU1 pred into PU2's pred buf */
4248             if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) &&
4249                (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id))
4250             {
4251                 ps_part_type_result->pu1_pred =
4252                     ps_pred_buf_info[0][1].pu1_pred -
4253                     (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride
4254                                          : u1_pu1_wd);
4255                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4256 
4257                 ihevce_set_pred_buf_as_free(
4258                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4259 
4260                 pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred;
4261                 pu1_dst_pred = ps_part_type_result->pu1_pred;
4262                 i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4263                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4264 
4265                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4266                     pu1_dst_pred,
4267                     i4_dst_pred_stride,
4268                     pu1_src_pred,
4269                     i4_src_pred_stride,
4270                     u1_pu1_wd,
4271                     u1_pu1_ht);
4272             }
4273             else
4274             {
4275                 ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4276                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4277 
4278                 ihevce_set_pred_buf_as_free(
4279                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4280 
4281                 pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
4282                 pu1_dst_pred = ps_part_type_result->pu1_pred;
4283                 i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
4284                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4285 
4286                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4287                     pu1_dst_pred,
4288                     i4_dst_pred_stride,
4289                     pu1_src_pred,
4290                     i4_src_pred_stride,
4291                     u1_pu2_wd,
4292                     u1_pu2_ht);
4293             }
4294 
4295             break;
4296         }
4297         case 2:
4298         {
4299             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4300 
4301             ihevce_set_pred_buf_as_free(
4302                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
4303 
4304             /* Copy PU2 pred into PU1's pred buf */
4305             if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) &&
4306                (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id))
4307             {
4308                 ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
4309                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4310 
4311                 ihevce_set_pred_buf_as_free(
4312                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
4313 
4314                 pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred;
4315                 pu1_dst_pred = ps_part_type_result->pu1_pred +
4316                                (u1_is_part_vertical
4317                                     ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
4318                                     : u1_pu1_wd);
4319                 i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4320                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4321 
4322                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4323                     pu1_dst_pred,
4324                     i4_dst_pred_stride,
4325                     pu1_src_pred,
4326                     i4_src_pred_stride,
4327                     u1_pu2_wd,
4328                     u1_pu2_ht);
4329             }
4330             else
4331             {
4332                 ps_part_type_result->pu1_pred =
4333                     ps_pred_buf_info[2][1].pu1_pred -
4334                     (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride
4335                                          : u1_pu1_wd);
4336                 ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
4337 
4338                 ihevce_set_pred_buf_as_free(
4339                     pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4340 
4341                 pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
4342                 pu1_dst_pred = ps_part_type_result->pu1_pred;
4343                 i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
4344                 i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
4345 
4346                 ps_cmn_utils_optimised_function_list->pf_copy_2d(
4347                     pu1_dst_pred,
4348                     i4_dst_pred_stride,
4349                     pu1_src_pred,
4350                     i4_src_pred_stride,
4351                     u1_pu1_wd,
4352                     u1_pu1_ht);
4353             }
4354 
4355             break;
4356         }
4357         case 3:
4358         {
4359             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4360             ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
4361             ASSERT(
4362                 ps_pred_buf_info[2][1].u1_pred_buf_array_id ==
4363                 ps_pred_buf_info[2][0].u1_pred_buf_array_id);
4364 
4365             ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
4366             ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
4367 
4368             ihevce_set_pred_buf_as_free(
4369                 pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
4370 
4371             break;
4372         }
4373         }
4374     }
4375 }
4376 
hme_decide_search_candidate_priority_in_l1_and_l2_me(SEARCH_CANDIDATE_TYPE_T e_cand_type,ME_QUALITY_PRESETS_T e_quality_preset)4377 U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
4378     SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset)
4379 {
4380     U08 u1_priority_val =
4381         gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type];
4382 
4383     if(UCHAR_MAX == u1_priority_val)
4384     {
4385         ASSERT(0);
4386     }
4387 
4388     ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4389 
4390     return u1_priority_val;
4391 }
4392 
hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type,U08 u1_index)4393 U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index)
4394 {
4395     U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type];
4396 
4397     if(UCHAR_MAX == u1_priority_val)
4398     {
4399         ASSERT(0);
4400     }
4401 
4402     ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
4403 
4404     return u1_priority_val;
4405 }
4406 
hme_search_cand_data_init(S32 * pi4_id_Z,S32 * pi4_id_coloc,S32 * pi4_num_coloc_cands,U08 * pu1_search_candidate_list_index,S32 i4_num_act_ref_l0,S32 i4_num_act_ref_l1,U08 u1_is_bidir_enabled,U08 u1_4x4_blk_in_l1me)4407 void hme_search_cand_data_init(
4408     S32 *pi4_id_Z,
4409     S32 *pi4_id_coloc,
4410     S32 *pi4_num_coloc_cands,
4411     U08 *pu1_search_candidate_list_index,
4412     S32 i4_num_act_ref_l0,
4413     S32 i4_num_act_ref_l1,
4414     U08 u1_is_bidir_enabled,
4415     U08 u1_4x4_blk_in_l1me)
4416 {
4417     S32 i, j;
4418     S32 i4_num_coloc_cands;
4419 
4420     U08 u1_search_candidate_list_index;
4421 
4422     if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me)
4423     {
4424         S32 i;
4425 
4426         u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2;
4427         i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4428 
4429         switch(i4_num_act_ref_l0)
4430         {
4431         case 1:
4432         {
4433             for(i = 0; i < 2; i++)
4434             {
4435                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4436                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4437                     u1_search_candidate_list_index);
4438             }
4439 
4440             break;
4441         }
4442         case 2:
4443         {
4444             for(i = 0; i < 4; i++)
4445             {
4446                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4447                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4448                     u1_search_candidate_list_index);
4449             }
4450 
4451             break;
4452         }
4453         case 3:
4454         {
4455             for(i = 0; i < 6; i++)
4456             {
4457                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4458                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4459                     u1_search_candidate_list_index);
4460             }
4461 
4462             break;
4463         }
4464         case 4:
4465         {
4466             for(i = 0; i < 8; i++)
4467             {
4468                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4469                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4470                     u1_search_candidate_list_index);
4471             }
4472 
4473             break;
4474         }
4475         default:
4476         {
4477             ASSERT(0);
4478         }
4479         }
4480 
4481         *pi4_num_coloc_cands = i4_num_coloc_cands;
4482         *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4483     }
4484     else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me)
4485     {
4486         S32 i;
4487 
4488         i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
4489         u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1;
4490 
4491         switch(i4_num_act_ref_l0)
4492         {
4493         case 1:
4494         {
4495             for(i = 0; i < 2; i++)
4496             {
4497                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4498                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4499                     u1_search_candidate_list_index);
4500             }
4501 
4502             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4503                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4504 
4505             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4506                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4507 
4508             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4509                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4510 
4511             i4_num_coloc_cands += 3;
4512 
4513             break;
4514         }
4515         case 2:
4516         {
4517             for(i = 0; i < 4; i++)
4518             {
4519                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4520                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4521                     u1_search_candidate_list_index);
4522             }
4523 
4524             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4525                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4526 
4527             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4528                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4529 
4530             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4531                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4532 
4533             pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4534                 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4535 
4536             pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4537                 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4538 
4539             pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4540                 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4541 
4542             i4_num_coloc_cands += 6;
4543 
4544             break;
4545         }
4546         case 3:
4547         {
4548             for(i = 0; i < 6; i++)
4549             {
4550                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4551                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4552                     u1_search_candidate_list_index);
4553             }
4554 
4555             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4556                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4557 
4558             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4559                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4560 
4561             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4562                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4563 
4564             pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4565                 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4566 
4567             pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4568                 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4569 
4570             pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4571                 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4572 
4573             i4_num_coloc_cands += 6;
4574 
4575             break;
4576         }
4577         case 4:
4578         {
4579             for(i = 0; i < 8; i++)
4580             {
4581                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4582                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4583                     u1_search_candidate_list_index);
4584             }
4585 
4586             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4587                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4588 
4589             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4590                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4591 
4592             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4593                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4594 
4595             pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
4596                 PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
4597 
4598             pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
4599                 PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
4600 
4601             pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
4602                 PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
4603 
4604             i4_num_coloc_cands += 6;
4605 
4606             break;
4607         }
4608         default:
4609         {
4610             ASSERT(0);
4611         }
4612         }
4613 
4614         *pi4_num_coloc_cands = i4_num_coloc_cands;
4615         *pu1_search_candidate_list_index = u1_search_candidate_list_index;
4616     }
4617     else
4618     {
4619         /* The variable 'u1_search_candidate_list_index' is hardcoded */
4620         /* to 10 and 11 respectively. But, these values are not returned */
4621         /* by this function since the actual values are dependent on */
4622         /* the number of refs in L0 and L1 respectively */
4623         /* Hence, the actual return values are being recomputed */
4624         /* in the latter part of this block */
4625 
4626         if(!u1_4x4_blk_in_l1me)
4627         {
4628             u1_search_candidate_list_index = 10;
4629 
4630             i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4631 
4632             for(i = 0; i < i4_num_coloc_cands; i++)
4633             {
4634                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4635                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4636                     u1_search_candidate_list_index);
4637             }
4638         }
4639         else
4640         {
4641             u1_search_candidate_list_index = 11;
4642 
4643             i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
4644 
4645             for(i = 0; i < i4_num_coloc_cands; i++)
4646             {
4647                 pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4648                     (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
4649                     u1_search_candidate_list_index);
4650             }
4651 
4652             pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
4653                 PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
4654 
4655             pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
4656                 PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
4657 
4658             pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
4659                 PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
4660         }
4661 
4662         for(j = 0; j < 2; j++)
4663         {
4664             if(0 == j)
4665             {
4666                 pu1_search_candidate_list_index[j] =
4667                     8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me;
4668                 pi4_num_coloc_cands[j] =
4669                     (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2);
4670             }
4671             else
4672             {
4673                 pu1_search_candidate_list_index[j] =
4674                     8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me;
4675                 pi4_num_coloc_cands[j] =
4676                     (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2);
4677             }
4678         }
4679     }
4680 
4681     if(i4_num_act_ref_l0 || i4_num_act_ref_l1)
4682     {
4683         pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me(
4684             (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]);
4685     }
4686 
4687     if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled)
4688     {
4689         pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me(
4690             (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]);
4691     }
4692 }
4693 
4694 static U08
hme_determine_base_block_size(S32 * pi4_valid_part_array,S32 i4_num_valid_parts,U08 u1_cu_size)4695     hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size)
4696 {
4697     ASSERT(i4_num_valid_parts > 0);
4698 
4699     if(1 == i4_num_valid_parts)
4700     {
4701         ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N);
4702 
4703         return u1_cu_size;
4704     }
4705     else
4706     {
4707         if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR)
4708         {
4709             return u1_cu_size / 2;
4710         }
4711         else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R)
4712         {
4713             return u1_cu_size / 4;
4714         }
4715     }
4716 
4717     return u1_cu_size / 4;
4718 }
4719 
hme_compute_variance_of_pu_from_base_blocks(ULWORD64 * pu8_SigmaX,ULWORD64 * pu8_SigmaXSquared,U08 u1_cu_size,U08 u1_base_block_size,S32 i4_part_id)4720 static U32 hme_compute_variance_of_pu_from_base_blocks(
4721     ULWORD64 *pu8_SigmaX,
4722     ULWORD64 *pu8_SigmaXSquared,
4723     U08 u1_cu_size,
4724     U08 u1_base_block_size,
4725     S32 i4_part_id)
4726 {
4727     U08 i, j;
4728     ULWORD64 u8_final_variance;
4729 
4730     U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4731     S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4732     S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4733     U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4734     U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4735     U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4736     U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4737     U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4738     ULWORD64 u8_final_SigmaXSquared = 0;
4739     ULWORD64 u8_final_SigmaX = 0;
4740 
4741     if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4742     {
4743         U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4744                                         ? (gai1_is_part_vertical[i4_part_id]
4745                                                ? 0
4746                                                : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4747                                         : 0;
4748         U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4749                                      ? (gai1_is_part_vertical[i4_part_id]
4750                                             ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4751                                             : 0)
4752                                      : 0;
4753         U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4754         U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4755 
4756         for(i = u1_row_start_index; i < u1_row_end_index; i++)
4757         {
4758             for(j = u1_column_start_index; j < u1_column_end_index; j++)
4759             {
4760                 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4761                 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4762             }
4763         }
4764 
4765         u8_final_variance =
4766             u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4767         u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4768         u8_final_variance +=
4769             ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4770              (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4771         u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4772                              (u1_num_base_blocks * u4_num_pixels_in_base_block);
4773 
4774         ASSERT(u8_final_variance <= UINT_MAX);
4775     }
4776     else
4777     {
4778         U08 u1_row_start_index;
4779         U08 u1_column_start_index;
4780         U08 u1_row_end_index;
4781         U08 u1_column_end_index;
4782 
4783         switch(gau1_part_id_to_part_num[i4_part_id])
4784         {
4785         case 0:
4786         {
4787             u1_row_start_index = 0;
4788             u1_column_start_index = 0;
4789 
4790             break;
4791         }
4792         case 1:
4793         {
4794             u1_row_start_index = 0;
4795             u1_column_start_index = u1_num_base_blocks_in_pu_row;
4796 
4797             break;
4798         }
4799         case 2:
4800         {
4801             u1_row_start_index = u1_num_base_blocks_in_pu_column;
4802             u1_column_start_index = 0;
4803 
4804             break;
4805         }
4806         case 3:
4807         {
4808             u1_row_start_index = u1_num_base_blocks_in_pu_column;
4809             u1_column_start_index = u1_num_base_blocks_in_pu_row;
4810 
4811             break;
4812         }
4813         }
4814 
4815         u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4816         u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4817 
4818         for(i = u1_row_start_index; i < u1_row_end_index; i++)
4819         {
4820             for(j = u1_column_start_index; j < u1_column_end_index; j++)
4821             {
4822                 u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
4823                 u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
4824             }
4825         }
4826 
4827         u8_final_variance =
4828             u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
4829         u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
4830         u8_final_variance +=
4831             ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
4832              (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
4833         u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
4834                              (u1_num_base_blocks * u4_num_pixels_in_base_block);
4835 
4836         ASSERT(u8_final_variance <= UINT_MAX);
4837     }
4838 
4839     return u8_final_variance;
4840 }
4841 
hme_compute_variance_for_all_parts(U08 * pu1_data,S32 i4_data_stride,S32 * pi4_valid_part_array,U32 * pu4_variance,S32 i4_num_valid_parts,U08 u1_cu_size)4842 void hme_compute_variance_for_all_parts(
4843     U08 *pu1_data,
4844     S32 i4_data_stride,
4845     S32 *pi4_valid_part_array,
4846     U32 *pu4_variance,
4847     S32 i4_num_valid_parts,
4848     U08 u1_cu_size)
4849 {
4850     ULWORD64 au8_SigmaX[16];
4851     ULWORD64 au8_SigmaXSquared[16];
4852     U08 i, j, k, l;
4853     U08 u1_base_block_size;
4854     U08 u1_num_base_blocks_in_cu_row;
4855     U08 u1_num_base_blocks_in_cu_column;
4856 
4857     u1_base_block_size =
4858         hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
4859 
4860     u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column =
4861         u1_cu_size / u1_base_block_size;
4862 
4863     ASSERT(u1_num_base_blocks_in_cu_row <= 4);
4864 
4865     for(i = 0; i < u1_num_base_blocks_in_cu_column; i++)
4866     {
4867         for(j = 0; j < u1_num_base_blocks_in_cu_row; j++)
4868         {
4869             U08 *pu1_buf =
4870                 pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride);
4871 
4872             au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0;
4873             au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0;
4874 
4875             for(k = 0; k < u1_base_block_size; k++)
4876             {
4877                 for(l = 0; l < u1_base_block_size; l++)
4878                 {
4879                     au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] +=
4880                         pu1_buf[l + k * i4_data_stride];
4881                     au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] +=
4882                         pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride];
4883                 }
4884             }
4885         }
4886     }
4887 
4888     for(i = 0; i < i4_num_valid_parts; i++)
4889     {
4890         pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks(
4891             au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]);
4892     }
4893 }
4894 
hme_compute_final_sigma_of_pu_from_base_blocks(U32 * pu4_SigmaX,U32 * pu4_SigmaXSquared,ULWORD64 * pu8_final_sigmaX,ULWORD64 * pu8_final_sigmaX_Squared,U08 u1_cu_size,U08 u1_base_block_size,S32 i4_part_id,U08 u1_base_blk_array_stride)4895 void hme_compute_final_sigma_of_pu_from_base_blocks(
4896     U32 *pu4_SigmaX,
4897     U32 *pu4_SigmaXSquared,
4898     ULWORD64 *pu8_final_sigmaX,
4899     ULWORD64 *pu8_final_sigmaX_Squared,
4900     U08 u1_cu_size,
4901     U08 u1_base_block_size,
4902     S32 i4_part_id,
4903     U08 u1_base_blk_array_stride)
4904 {
4905     U08 i, j;
4906     //U08 u1_num_base_blocks_in_cu_row;
4907 
4908     U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
4909     S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
4910     S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
4911     U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
4912     U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
4913     U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
4914     U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
4915     U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block);
4916 
4917     /*if (u1_is_for_src)
4918     {
4919     u1_num_base_blocks_in_cu_row = 16;
4920     }
4921     else
4922     {
4923     u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
4924     }*/
4925 
4926     pu8_final_sigmaX[i4_part_id] = 0;
4927     pu8_final_sigmaX_Squared[i4_part_id] = 0;
4928 
4929     if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
4930     {
4931         U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
4932                                         ? (gai1_is_part_vertical[i4_part_id]
4933                                                ? 0
4934                                                : (u1_cu_size - i4_part_wd) / u1_base_block_size)
4935                                         : 0;
4936         U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
4937                                      ? (gai1_is_part_vertical[i4_part_id]
4938                                             ? (u1_cu_size - i4_part_ht) / u1_base_block_size
4939                                             : 0)
4940                                      : 0;
4941         U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4942         U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4943 
4944         for(i = u1_row_start_index; i < u1_row_end_index; i++)
4945         {
4946             for(j = u1_column_start_index; j < u1_column_end_index; j++)
4947             {
4948                 pu8_final_sigmaX_Squared[i4_part_id] +=
4949                     pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
4950                 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
4951             }
4952         }
4953     }
4954     else
4955     {
4956         U08 u1_row_start_index;
4957         U08 u1_column_start_index;
4958         U08 u1_row_end_index;
4959         U08 u1_column_end_index;
4960 
4961         switch(gau1_part_id_to_part_num[i4_part_id])
4962         {
4963         case 0:
4964         {
4965             u1_row_start_index = 0;
4966             u1_column_start_index = 0;
4967 
4968             break;
4969         }
4970         case 1:
4971         {
4972             u1_row_start_index = 0;
4973             u1_column_start_index = u1_num_base_blocks_in_pu_row;
4974 
4975             break;
4976         }
4977         case 2:
4978         {
4979             u1_row_start_index = u1_num_base_blocks_in_pu_column;
4980             u1_column_start_index = 0;
4981 
4982             break;
4983         }
4984         case 3:
4985         {
4986             u1_row_start_index = u1_num_base_blocks_in_pu_column;
4987             u1_column_start_index = u1_num_base_blocks_in_pu_row;
4988 
4989             break;
4990         }
4991         }
4992 
4993         u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
4994         u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
4995 
4996         for(i = u1_row_start_index; i < u1_row_end_index; i++)
4997         {
4998             for(j = u1_column_start_index; j < u1_column_end_index; j++)
4999             {
5000                 pu8_final_sigmaX_Squared[i4_part_id] +=
5001                     pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
5002                 pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
5003             }
5004         }
5005     }
5006 
5007     pu8_final_sigmaX_Squared[i4_part_id] *= u4_N;
5008 }
5009 
hme_compute_stim_injected_distortion_for_all_parts(U08 * pu1_pred,S32 i4_pred_stride,S32 * pi4_valid_part_array,ULWORD64 * pu8_src_sigmaX,ULWORD64 * pu8_src_sigmaXSquared,S32 * pi4_sad_array,S32 i4_alpha_stim_multiplier,S32 i4_inv_wt,S32 i4_inv_wt_shift_val,S32 i4_num_valid_parts,S32 i4_wpred_log_wdc,U08 u1_cu_size)5010 void hme_compute_stim_injected_distortion_for_all_parts(
5011     U08 *pu1_pred,
5012     S32 i4_pred_stride,
5013     S32 *pi4_valid_part_array,
5014     ULWORD64 *pu8_src_sigmaX,
5015     ULWORD64 *pu8_src_sigmaXSquared,
5016     S32 *pi4_sad_array,
5017     S32 i4_alpha_stim_multiplier,
5018     S32 i4_inv_wt,
5019     S32 i4_inv_wt_shift_val,
5020     S32 i4_num_valid_parts,
5021     S32 i4_wpred_log_wdc,
5022     U08 u1_cu_size)
5023 {
5024     U32 au4_sigmaX[16], au4_sigmaXSquared[16];
5025     ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17];
5026     S32 i4_noise_term;
5027     U16 i2_count;
5028 
5029     ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist;
5030     ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var;
5031 
5032     U08 u1_base_block_size;
5033 
5034     WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
5035 
5036     u1_base_block_size =
5037         hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
5038 
5039     ASSERT(u1_cu_size >= 16);
5040 
5041     hme_compute_sigmaX_and_sigmaXSquared(
5042         pu1_pred,
5043         i4_pred_stride,
5044         au4_sigmaX,
5045         au4_sigmaXSquared,
5046         u1_base_block_size,
5047         u1_base_block_size,
5048         u1_cu_size,
5049         u1_cu_size,
5050         1,
5051         u1_cu_size / u1_base_block_size);
5052 
5053     /* Noise Term Computation */
5054     for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++)
5055     {
5056         unsigned long u4_shift_val;
5057         S32 i4_bits_req;
5058         S32 part_id = pi4_valid_part_array[i2_count];
5059 
5060         if(i4_alpha_stim_multiplier)
5061         {
5062             /* Final SigmaX and SigmaX-Squared Calculation */
5063             hme_compute_final_sigma_of_pu_from_base_blocks(
5064                 au4_sigmaX,
5065                 au4_sigmaXSquared,
5066                 au8_final_ref_sigmaX,
5067                 au8_final_ref_sigmaXSquared,
5068                 u1_cu_size,
5069                 u1_base_block_size,
5070                 part_id,
5071                 (u1_cu_size / u1_base_block_size));
5072 
5073             u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
5074             u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
5075 
5076             u4_shift_val = ihevce_calc_stim_injected_variance(
5077                 pu8_src_sigmaX,
5078                 pu8_src_sigmaXSquared,
5079                 &u8_src_var,
5080                 i4_inv_wt,
5081                 i4_inv_wt_shift_val,
5082                 i4_wpred_log_wdc,
5083                 part_id);
5084 
5085             u8_ref_var = u8_ref_var >> u4_shift_val;
5086 
5087             GETRANGE64(i4_bits_req, u8_ref_var);
5088 
5089             if(i4_bits_req > 27)
5090             {
5091                 u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
5092                 u8_src_var = u8_src_var >> (i4_bits_req - 27);
5093             }
5094 
5095             if(u8_src_var == u8_ref_var)
5096             {
5097                 u8_temp_var = (1 << STIM_Q_FORMAT);
5098             }
5099             else
5100             {
5101                 u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT));
5102                 u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
5103                 u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
5104                 u8_temp_var = (u8_temp_var / u8_temp_var1);
5105                 u8_temp_var = (2 * u8_temp_var);
5106             }
5107 
5108             i4_noise_term = (UWORD32)u8_temp_var;
5109 
5110             ASSERT(i4_noise_term >= 0);
5111 
5112             i4_noise_term *= i4_alpha_stim_multiplier;
5113         }
5114         else
5115         {
5116             i4_noise_term = 0;
5117         }
5118 
5119         u8_pure_dist = pi4_sad_array[part_id];
5120         u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
5121         u8_pure_dist += (1 << ((i4_q_level)-1));
5122         pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level));
5123     }
5124 }
5125 
hme_compute_sigmaX_and_sigmaXSquared(U08 * pu1_data,S32 i4_buf_stride,void * pv_sigmaX,void * pv_sigmaXSquared,U08 u1_base_blk_wd,U08 u1_base_blk_ht,U08 u1_blk_wd,U08 u1_blk_ht,U08 u1_is_sigma_pointer_size_32_bit,U08 u1_array_stride)5126 void hme_compute_sigmaX_and_sigmaXSquared(
5127     U08 *pu1_data,
5128     S32 i4_buf_stride,
5129     void *pv_sigmaX,
5130     void *pv_sigmaXSquared,
5131     U08 u1_base_blk_wd,
5132     U08 u1_base_blk_ht,
5133     U08 u1_blk_wd,
5134     U08 u1_blk_ht,
5135     U08 u1_is_sigma_pointer_size_32_bit,
5136     U08 u1_array_stride)
5137 {
5138     U08 i, j, k, l;
5139     U08 u1_num_base_blks_in_row;
5140     U08 u1_num_base_blks_in_column;
5141 
5142     u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd;
5143     u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht;
5144 
5145     if(u1_is_sigma_pointer_size_32_bit)
5146     {
5147         U32 *sigmaX, *sigmaXSquared;
5148 
5149         sigmaX = (U32 *)pv_sigmaX;
5150         sigmaXSquared = (U32 *)pv_sigmaXSquared;
5151 
5152         /* Loop to compute the sigma_X and sigma_X_Squared */
5153         for(i = 0; i < u1_num_base_blks_in_column; i++)
5154         {
5155             for(j = 0; j < u1_num_base_blks_in_row; j++)
5156             {
5157                 U32 u4_sigmaX = 0, u4_sigmaXSquared = 0;
5158                 U08 *pu1_buf =
5159                     pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5160 
5161                 for(k = 0; k < u1_base_blk_ht; k++)
5162                 {
5163                     for(l = 0; l < u1_base_blk_wd; l++)
5164                     {
5165                         u4_sigmaX += pu1_buf[l + k * i4_buf_stride];
5166                         u4_sigmaXSquared +=
5167                             (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5168                     }
5169                 }
5170 
5171                 sigmaX[j + i * u1_array_stride] = u4_sigmaX;
5172                 sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared;
5173             }
5174         }
5175     }
5176     else
5177     {
5178         ULWORD64 *sigmaX, *sigmaXSquared;
5179 
5180         sigmaX = (ULWORD64 *)pv_sigmaX;
5181         sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared;
5182 
5183         /* Loop to compute the sigma_X and sigma_X_Squared */
5184         for(i = 0; i < u1_num_base_blks_in_column; i++)
5185         {
5186             for(j = 0; j < u1_num_base_blks_in_row; j++)
5187             {
5188                 ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0;
5189                 U08 *pu1_buf =
5190                     pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
5191 
5192                 for(k = 0; k < u1_base_blk_ht; k++)
5193                 {
5194                     for(l = 0; l < u1_base_blk_wd; l++)
5195                     {
5196                         u8_sigmaX += pu1_buf[l + k * i4_buf_stride];
5197                         u8_sigmaXSquared +=
5198                             (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
5199                     }
5200                 }
5201 
5202                 u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht;
5203 
5204                 sigmaX[j + i * u1_array_stride] = u8_sigmaX;
5205                 sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared;
5206             }
5207         }
5208     }
5209 }
5210 
5211 #if TEMPORAL_NOISE_DETECT
ihevce_16x16block_temporal_noise_detect(WORD32 had_block_size,WORD32 ctb_width,WORD32 ctb_height,ihevce_ctb_noise_params * ps_ctb_noise_params,fpel_srch_cand_init_data_t * s_proj_srch_cand_init_data,hme_search_prms_t * s_search_prms_blk,me_frm_ctxt_t * ps_ctxt,WORD32 num_pred_dir,WORD32 i4_num_act_ref_l0,WORD32 i4_num_act_ref_l1,WORD32 i4_cu_x_off,WORD32 i4_cu_y_off,wgt_pred_ctxt_t * ps_wt_inp_prms,WORD32 input_stride,WORD32 index_8x8_block,WORD32 num_horz_blocks,WORD32 num_8x8_in_ctb_row,WORD32 i4_16x16_index)5212 WORD32 ihevce_16x16block_temporal_noise_detect(
5213     WORD32 had_block_size,
5214     WORD32 ctb_width,
5215     WORD32 ctb_height,
5216     ihevce_ctb_noise_params *ps_ctb_noise_params,
5217     fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
5218     hme_search_prms_t *s_search_prms_blk,
5219     me_frm_ctxt_t *ps_ctxt,
5220     WORD32 num_pred_dir,
5221     WORD32 i4_num_act_ref_l0,
5222     WORD32 i4_num_act_ref_l1,
5223     WORD32 i4_cu_x_off,
5224     WORD32 i4_cu_y_off,
5225     wgt_pred_ctxt_t *ps_wt_inp_prms,
5226     WORD32 input_stride,
5227     WORD32 index_8x8_block,
5228     WORD32 num_horz_blocks,
5229     WORD32 num_8x8_in_ctb_row,
5230     WORD32 i4_16x16_index)
5231 {
5232     WORD32 i;
5233     WORD32 noise_detected;
5234 
5235     UWORD8 *pu1_l0_block;
5236     UWORD8 *pu1_l1_block;
5237 
5238     WORD32 mean;
5239     UWORD32 variance_8x8;
5240 
5241     /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
5242     WORD16 pi2_residue_16x16[256];
5243     WORD32 mean_16x16;
5244     UWORD32 variance_16x16[2];
5245 
5246     /* throw errors in case of un- supported arguments */
5247     /* assumptions size is 8 or 16 or 32 */
5248     assert(
5249         (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32));  //ihevc_assert
5250 
5251     /* initialize the variables */
5252     noise_detected = 0;
5253     variance_8x8 = 0;
5254 
5255     mean = 0;
5256 
5257     {
5258         i = 0;
5259         /* get the ref/pred and source using the MV of both directions */
5260         /* pick the best candidates in each direction */
5261         /* Colocated cands */
5262         {
5263             // steps to be done
5264             /* pick the candidates */
5265             /* do motion compoensation using the candidates got from prev step : pick from the offset */
5266             /* get the ref or the pred from the offset*/
5267             /* get the source data */
5268             /* send the pred - source to noise detect */
5269             /* do noise detect on the residue of source and pred */
5270 
5271             layer_mv_t *ps_layer_mvbank;
5272             hme_mv_t *ps_mv;
5273 
5274             //S32 i;
5275             S32 wd_c, ht_c, wd_p, ht_p;
5276             S32 blksize_p, blk_x, blk_y, i4_offset;
5277             S08 *pi1_ref_idx;
5278             fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data;
5279             layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer;
5280             layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer;
5281             err_prms_t s_err_prms;
5282             S32 i4_blk_wd;
5283             S32 i4_blk_ht;
5284             BLK_SIZE_T e_blk_size;
5285             hme_search_prms_t *ps_search_prms;
5286             S32 i4_part_mask;
5287             S32 *pi4_valid_part_ids;
5288 
5289             /* has list of valid partition to search terminated by -1 */
5290             S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
5291 
5292             /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/
5293 
5294             S32 i4_pos_x;
5295             S32 i4_pos_y;
5296             U08 u1_pred_dir;  // = ps_ctxt_2->u1_pred_dir;
5297             U08 u1_default_ref_id = 0;  //ps_ctxt_2->u1_default_ref_id;
5298             S32 i4_inp_off, i4_ref_offset, i4_ref_stride;
5299 
5300             /* The reference is actually an array of ptrs since there are several    */
5301             /* reference id. So an array gets passed form calling function           */
5302             U08 **ppu1_ref;
5303 
5304             /* Atributes of input candidates */
5305             search_node_t as_search_node[2];
5306             wgt_pred_ctxt_t *ps_wt_inp_prms;
5307 
5308             S32 posx;
5309             S32 posy;
5310             S32 i4_num_results_to_proj;
5311             S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
5312             S32 i4_inp_stride;
5313 
5314             /* intialize variables */
5315             /* Width and ht of current and prev layers */
5316             wd_c = ps_curr_layer->i4_wd;
5317             ht_c = ps_curr_layer->i4_ht;
5318             wd_p = ps_coarse_layer->i4_wd;
5319             ht_p = ps_coarse_layer->i4_ht;
5320 
5321             ps_search_prms = s_search_prms_blk;
5322 
5323             ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
5324             e_blk_size = ps_search_prms->e_blk_size;
5325             i4_part_mask = ps_search_prms->i4_part_mask;
5326 
5327             i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
5328             i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
5329 
5330             ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
5331             blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
5332 
5333             /* ASSERT for valid sizes */
5334             ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
5335 
5336             i4_pos_x = i4_cu_x_off;
5337             i4_pos_y = i4_cu_y_off;
5338             posx = i4_pos_x + 2;
5339             posy = i4_pos_y + 2;
5340 
5341             i4_inp_stride = ps_search_prms->i4_inp_stride;
5342             /* Move to the location of the search blk in inp buffer */
5343             //i4_inp_off = i4_cu_x_off;
5344             //i4_inp_off += i4_cu_y_off * i4_inp_stride;
5345             i4_inp_off = (i4_16x16_index % 4) * 16;
5346             i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride;
5347 
5348             /***********pick the candidates**************************************/
5349             for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++)
5350             {
5351                 WORD32 actual_pred_dir = 0;
5352 
5353                 if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0)
5354                 {
5355                     actual_pred_dir = 1;
5356                 }
5357                 else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0)
5358                 {
5359                     actual_pred_dir = 0;
5360                 }
5361                 else if(u1_pred_dir == 1)
5362                 {
5363                     actual_pred_dir = 1;
5364                 }
5365 
5366                 i4_num_results_to_proj = 1;  // only the best proj
5367 
5368                 /* Safety check to avoid uninitialized access across temporal layers */
5369                 posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */
5370                 posy = CLIP3(posy, 0, (ht_c - blksize_p));
5371 
5372                 /* Project the positions to prev layer */
5373                 blk_x = posx >> blksize_p;
5374                 blk_y = posy >> blksize_p;
5375 
5376                 /* Pick up the mvs from the location */
5377                 i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
5378                 i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
5379 
5380                 ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
5381                 pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
5382 
5383                 if(actual_pred_dir == 1)
5384                 {
5385                     ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5386                     pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
5387                 }
5388 
5389                 {
5390                     as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1;
5391                     as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1;
5392                     as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0];
5393 
5394                     if((as_search_node[actual_pred_dir].i1_ref_idx < 0) ||
5395                        (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV))
5396                     {
5397                         as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id;
5398                         as_search_node[actual_pred_dir].s_mv.i2_mvx = 0;
5399                         as_search_node[actual_pred_dir].s_mv.i2_mvy = 0;
5400                     }
5401                 }
5402 
5403                 /********************************************************************************************/
5404                 {
5405                     /* declare the variables */
5406                     //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
5407 
5408                     pi4_valid_part_ids = ai4_valid_part_ids;
5409                     i4_ref_stride = ps_curr_layer->i4_rec_stride;
5410                     s_err_prms.i4_inp_stride = i4_inp_stride;
5411                     s_err_prms.i4_ref_stride = i4_ref_stride;
5412                     s_err_prms.i4_part_mask = i4_part_mask;
5413                     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
5414                     s_err_prms.i4_blk_wd = i4_blk_wd;
5415                     s_err_prms.i4_blk_ht = i4_blk_ht;
5416                     s_err_prms.i4_step = 1;
5417                     s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
5418                     //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
5419 
5420                     /*************************************************************************/
5421                     /* Depending on flag i4_use_rec, we use either input of previously       */
5422                     /* encoded pictures or we use recon of previously encoded pictures.      */
5423                     i4_ref_stride = ps_curr_layer->i4_rec_stride;
5424                     ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy;  // pointer to the pred
5425 
5426                     i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off;  //i4_x_off;
5427 
5428                     s_err_prms.pu1_ref =
5429                         ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset;
5430                     s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx;
5431                     s_err_prms.pu1_ref +=
5432                         as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride;
5433 
5434                     /*get the source */
5435                     s_err_prms.pu1_inp =
5436                         ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] +
5437                         i4_inp_off;  //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off;
5438 
5439                     /* send the pred - source to noise detect */
5440                     // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref);
5441                 }
5442                 /* change the l0/l1 blcok pointer names accrodingle */
5443 
5444                 /* get memory pointers the input and the reference */
5445                 pu1_l0_block = s_err_prms.pu1_inp;
5446                 pu1_l1_block = s_err_prms.pu1_ref;
5447 
5448                 {
5449                     WORD32 i2, j2;
5450                     WORD32 dim = 16;
5451                     UWORD8 *buf1;
5452                     UWORD8 *buf2;
5453                     for(i2 = 0; i2 < dim; i2++)
5454                     {
5455                         buf1 = pu1_l0_block + i2 * i4_inp_stride;
5456                         buf2 = pu1_l1_block + i2 * i4_ref_stride;
5457 
5458                         for(j2 = 0; j2 < dim; j2++)
5459                         {
5460                             pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]);
5461                         }
5462                     }
5463 
5464                     ihevce_calc_variance_signed(
5465                         pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16);
5466 
5467                     /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */
5468                     if(variance_16x16[u1_pred_dir] >
5469                        ((TEMPORAL_VARIANCE_FACTOR *
5470                          ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >>
5471                         Q_TEMPORAL_VARIANCE_FACTOR))
5472                     {
5473                         /* update noisy block count only if all  best MV in diff directions indicates noise */
5474                         if(u1_pred_dir == num_pred_dir - 1)
5475                         {
5476                             ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1;
5477                             ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1;
5478                             ps_ctb_noise_params
5479                                 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1;
5480                             ps_ctb_noise_params
5481                                 ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1;
5482                             noise_detected = 1;
5483                         }
5484                     }
5485                     else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/
5486                     {
5487                         noise_detected = 0;
5488                         ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0;
5489                         ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0;
5490                         ps_ctb_noise_params
5491                             ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0;
5492                         ps_ctb_noise_params
5493                             ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0;
5494                         break;
5495                     }
5496                 }  // variance analysis and calculation
5497             }  // for each direction
5498         }  // HME code
5499 
5500     }  // for each 16x16 block
5501 
5502     return (noise_detected);
5503 }
5504 #endif
5505 
hme_qpel_interp_avg_1pt(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,S32 i4_buf_id,U08 ** ppu1_final,S32 * pi4_final_stride)5506 void hme_qpel_interp_avg_1pt(
5507     interp_prms_t *ps_prms,
5508     S32 i4_mv_x,
5509     S32 i4_mv_y,
5510     S32 i4_buf_id,
5511     U08 **ppu1_final,
5512     S32 *pi4_final_stride)
5513 {
5514     U08 *pu1_src1, *pu1_src2, *pu1_dst;
5515     qpel_input_buf_cfg_t *ps_inp_cfg;
5516     S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
5517 
5518     /*************************************************************************/
5519     /* For a given QPEL pt, we need to determine the 2 source pts that are   */
5520     /* needed to do the QPEL averaging. The logic to do this is as follows   */
5521     /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are     */
5522     /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0    */
5523     /* pt of th reference blk that is colocated to the inp blk.              */
5524     /*    A j E k B                                                          */
5525     /*    l m n o p                                                          */
5526     /*    F q G r H                                                          */
5527     /*    s t u v w                                                          */
5528     /*    C x I y D                                                          */
5529     /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
5530     /* and (1,1) respectively in the fpel buffer (id = 0)                    */
5531     /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf  */
5532     /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf  */
5533     /* G is hxhy pt in offset 0,0 in hxhy buf                                */
5534     /* All above offsets are computed w.r.t. motion displaced pt in          */
5535     /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and  */
5536     /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G                     */
5537     /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3   */
5538     /* If we consider pt v to be derived. v has a fractional comp of 3, 3    */
5539     /* v is avg of H and I. So the table look up of v should give following  */
5540     /* buf 1 (H) : offset = (1, 0) buf id = 2.                               */
5541     /* buf 2 (I) : offset = 0 , 1) buf id = 1.                               */
5542     /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1.       */
5543     /*************************************************************************/
5544     i4_mv_x_frac = i4_mv_x & 3;
5545     i4_mv_y_frac = i4_mv_y & 3;
5546 
5547     i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
5548 
5549     /* Derive the descriptor that has all offset and size info */
5550     ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
5551 
5552     pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
5553     pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
5554     pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
5555 
5556     pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
5557     pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
5558     pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
5559 
5560     pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
5561     hevc_avg_2d(
5562         pu1_src1,
5563         pu1_src2,
5564         ps_prms->i4_ref_stride,
5565         ps_prms->i4_ref_stride,
5566         ps_prms->i4_blk_wd,
5567         ps_prms->i4_blk_ht,
5568         pu1_dst,
5569         ps_prms->i4_out_stride);
5570     ppu1_final[i4_buf_id] = pu1_dst;
5571     pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
5572 }
5573 
hme_qpel_interp_avg_2pt_vert_with_reuse(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,U08 ** ppu1_final,S32 * pi4_final_stride)5574 void hme_qpel_interp_avg_2pt_vert_with_reuse(
5575     interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5576 {
5577     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
5578 
5579     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
5580 }
5581 
hme_qpel_interp_avg_2pt_horz_with_reuse(interp_prms_t * ps_prms,S32 i4_mv_x,S32 i4_mv_y,U08 ** ppu1_final,S32 * pi4_final_stride)5582 void hme_qpel_interp_avg_2pt_horz_with_reuse(
5583     interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
5584 {
5585     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
5586 
5587     hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
5588 }
5589 
hme_set_mv_limit_using_dvsr_data(me_frm_ctxt_t * ps_ctxt,layer_ctxt_t * ps_curr_layer,range_prms_t * ps_mv_limit,S16 * pi2_prev_enc_frm_max_mv_y,U08 u1_num_act_ref_pics)5590 void hme_set_mv_limit_using_dvsr_data(
5591     me_frm_ctxt_t *ps_ctxt,
5592     layer_ctxt_t *ps_curr_layer,
5593     range_prms_t *ps_mv_limit,
5594     S16 *pi2_prev_enc_frm_max_mv_y,
5595     U08 u1_num_act_ref_pics)
5596 {
5597     WORD32 ref_ctr;
5598 
5599     /* Only for B/b pic. */
5600     if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
5601     {
5602         WORD16 i2_mv_y_per_poc, i2_max_mv_y;
5603         WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff;
5604         WORD32 prev_poc_count = 0;
5605         WORD32 i4_p_idx;
5606 
5607         pi2_prev_enc_frm_max_mv_y[0] = 0;
5608 
5609         cur_poc = ps_ctxt->i4_curr_poc;
5610 
5611         i4_p_idx = 0;
5612 
5613         /* Get abs MAX for symmetric search */
5614         i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y;
5615         /* Assuming P to P distance as 4 */
5616         i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2;
5617 
5618         for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++)
5619         {
5620             /* Get the prev. encoded frame POC */
5621             prev_poc = ps_ctxt->i4_prev_poc;
5622 
5623             ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
5624             abs_poc_diff = ABS((cur_poc - ref_poc));
5625             /* Get the cur. max MV based on POC distance */
5626             i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
5627             i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
5628 
5629             ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5630             ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
5631             ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5632             ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
5633 
5634             /* Find the MAX MV for the prev. encoded frame to optimize */
5635             /* the reverse dependency of ME on Enc.Loop                */
5636             if(ref_poc == prev_poc)
5637             {
5638                 /* TO DO : Same thing for horz. search also */
5639                 pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y;
5640                 prev_poc_count++;
5641             }
5642         }
5643     }
5644     else
5645     {
5646         ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
5647 
5648         /* Set the Config. File Params for P pic. */
5649         for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
5650         {
5651             ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
5652             ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
5653             ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
5654             ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
5655         }
5656 
5657         /* For P PIC., go with  Config. File Params */
5658         pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y;
5659     }
5660 }
5661 
hme_part_mask_populator(U08 * pu1_inp,S32 i4_inp_stride,U08 u1_limit_active_partitions,U08 u1_is_bPic,U08 u1_is_refPic,U08 u1_blk_8x8_mask,ME_QUALITY_PRESETS_T e_me_quality_preset)5662 S32 hme_part_mask_populator(
5663     U08 *pu1_inp,
5664     S32 i4_inp_stride,
5665     U08 u1_limit_active_partitions,
5666     U08 u1_is_bPic,
5667     U08 u1_is_refPic,
5668     U08 u1_blk_8x8_mask,
5669     ME_QUALITY_PRESETS_T e_me_quality_preset)
5670 {
5671     if(15 != u1_blk_8x8_mask)
5672     {
5673         return ENABLE_NxN;
5674     }
5675     else
5676     {
5677         U08 u1_call_inp_segmentation_based_part_mask_populator =
5678             (ME_XTREME_SPEED_25 != e_me_quality_preset) ||
5679             (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) ||
5680             (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) ||
5681             (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6);
5682 
5683         if(u1_call_inp_segmentation_based_part_mask_populator)
5684         {
5685             S32 i4_part_mask =
5686                 hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions);
5687 
5688             if(e_me_quality_preset == ME_XTREME_SPEED)
5689             {
5690                 i4_part_mask &= ~ENABLE_AMP;
5691             }
5692 
5693             if(e_me_quality_preset == ME_XTREME_SPEED_25)
5694             {
5695                 i4_part_mask &= ~ENABLE_AMP;
5696 
5697                 i4_part_mask &= ~ENABLE_SMP;
5698             }
5699 
5700             return i4_part_mask;
5701         }
5702         else
5703         {
5704             return ENABLE_2Nx2N;
5705         }
5706     }
5707 }
5708