1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /**
21 ******************************************************************************
22 * @file hme_search_algo.c
23 *
24 * @brief
25 *    Contains various search algorithms to be used by coarse/refinement layers
26 *
27 * @author
28 *    Ittiam
29 *
30 *
31 * List of Functions
32 * hme_compute_grid_results_step_gt_1()
33 * hme_compute_grid_results_step_1()
34 * hme_pred_search_square_stepn()
35 *
36 ******************************************************************************
37 */
38 
39 /*****************************************************************************/
40 /* File Includes                                                             */
41 /*****************************************************************************/
42 /* System include files */
43 #include <stdio.h>
44 #include <string.h>
45 #include <stdlib.h>
46 #include <assert.h>
47 #include <stdarg.h>
48 #include <math.h>
49 #include <limits.h>
50 
51 /* User include files */
52 #include "ihevc_typedefs.h"
53 #include "itt_video_api.h"
54 #include "ihevce_api.h"
55 
56 #include "rc_cntrl_param.h"
57 #include "rc_frame_info_collector.h"
58 #include "rc_look_ahead_params.h"
59 
60 #include "ihevc_defs.h"
61 #include "ihevc_structs.h"
62 #include "ihevc_platform_macros.h"
63 #include "ihevc_deblk.h"
64 #include "ihevc_itrans_recon.h"
65 #include "ihevc_chroma_itrans_recon.h"
66 #include "ihevc_chroma_intra_pred.h"
67 #include "ihevc_intra_pred.h"
68 #include "ihevc_inter_pred.h"
69 #include "ihevc_mem_fns.h"
70 #include "ihevc_padding.h"
71 #include "ihevc_weighted_pred.h"
72 #include "ihevc_sao.h"
73 #include "ihevc_resi_trans.h"
74 #include "ihevc_quant_iquant_ssd.h"
75 #include "ihevc_cabac_tables.h"
76 
77 #include "ihevce_defs.h"
78 #include "ihevce_lap_enc_structs.h"
79 #include "ihevce_multi_thrd_structs.h"
80 #include "ihevce_multi_thrd_funcs.h"
81 #include "ihevce_me_common_defs.h"
82 #include "ihevce_had_satd.h"
83 #include "ihevce_error_codes.h"
84 #include "ihevce_bitstream.h"
85 #include "ihevce_cabac.h"
86 #include "ihevce_rdoq_macros.h"
87 #include "ihevce_function_selector.h"
88 #include "ihevce_enc_structs.h"
89 #include "ihevce_entropy_structs.h"
90 #include "ihevce_cmn_utils_instr_set_router.h"
91 #include "ihevce_enc_loop_structs.h"
92 #include "ihevce_bs_compute_ctb.h"
93 #include "ihevce_global_tables.h"
94 #include "ihevce_dep_mngr_interface.h"
95 #include "hme_datatype.h"
96 #include "hme_interface.h"
97 #include "hme_common_defs.h"
98 #include "hme_defs.h"
99 #include "ihevce_me_instr_set_router.h"
100 #include "hme_globals.h"
101 #include "hme_utils.h"
102 #include "hme_coarse.h"
103 #include "hme_fullpel.h"
104 #include "hme_subpel.h"
105 #include "hme_refine.h"
106 #include "hme_err_compute.h"
107 #include "hme_common_utils.h"
108 #include "hme_search_algo.h"
109 #include "ihevce_stasino_helpers.h"
110 #include "ihevce_common_utils.h"
111 
112 /*****************************************************************************/
113 /* Function Definitions                                                      */
114 /*****************************************************************************/
115 
116 /**
117 ********************************************************************************
118 *  @fn     void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms,
119 result_upd_prms_t *ps_result_prms,
120 BLK_SIZE_T e_blk_size)
121 *
122 *  @brief  Updates results for a grid of step = 1
123 *
124 *  @param[in] ps_err_prms: Various parameters to this function
125 *
126 *  @param[in] ps_result_prms : Parameters pertaining to result updation
127 *
128 *  @param[out] e_blk_size: Block size of the blk being searched for
129 *
130 *  @return none
131 ********************************************************************************
132 */
hme_compute_grid_results(err_prms_t * ps_err_prms,result_upd_prms_t * ps_result_prms,BLK_SIZE_T e_blk_size)133 void hme_compute_grid_results(
134     err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size)
135 {
136     PF_RESULT_FXN_T pf_hme_result_fxn;
137     PF_SAD_FXN_T pf_sad_fxn;
138     S32 i4_num_results;
139     S32 part_id;
140 
141     part_id = ps_result_prms->pi4_valid_part_ids[0];
142 
143     i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part;
144 
145     pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask);
146 
147     pf_hme_result_fxn =
148         hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results);
149 
150     pf_sad_fxn(ps_err_prms);
151     pf_hme_result_fxn(ps_result_prms);
152 }
153 
154 /**
155 ********************************************************************************
156 *  @fn     void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
157 *                                   layer_ctxt_t *ps_layer_ctxt)
158 *
159 *  @brief  Implements predictive search, with square grid refinement. In this
160 *          case, we start with a bigger step size, like 4, refining upto a
161 *          variable number of pts, till we hit end of search range or hit a
162 *          minima. Then we refine using smaller steps. The bigger step size
163 *          like 4 or 2, do not use optimized SAD functions, they evaluate
164 *          SAD for each individual pt.
165 *
166 *  @param[in,out]  ps_search_prms: All the params to this function
167 *
168 *  @param[in] ps_layer_ctxt: Context for the layer
169 *
170 *  @return None
171 ********************************************************************************
172 */
hme_pred_search_square_stepn(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,ME_QUALITY_PRESETS_T e_me_quality_preset,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)173 void hme_pred_search_square_stepn(
174     hme_search_prms_t *ps_search_prms,
175     layer_ctxt_t *ps_layer_ctxt,
176     wgt_pred_ctxt_t *ps_wt_inp_prms,
177     ME_QUALITY_PRESETS_T e_me_quality_preset,
178     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
179 
180 )
181 {
182     /* Stores the SAD for all parts at each pt in the grid */
183     S32 ai4_sad_grid[9][TOT_NUM_PARTS];
184 
185     S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
186 
187     /* Atributes of input candidates */
188     search_candt_t *ps_search_candts;
189     search_node_t s_search_node;
190 
191     /* Number of candidates to search */
192     S32 i4_num_candts, max_num_iters, i4_num_results;
193 
194     /* Input and reference attributes */
195     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
196 
197     /* The reference is actually an array of ptrs since there are several    */
198     /* reference id. So an array gets passed form calling function           */
199     U08 **ppu1_ref;
200 
201     /* Holds the search results at the end of this fxn */
202     search_results_t *ps_search_results;
203 
204     /* These control number of parts and number of pts in grid to search */
205     S32 i4_part_mask, i4_grid_mask;
206 
207     /* Blk width, blk height and blk size are derived from input params */
208     BLK_SIZE_T e_blk_size;
209     CU_SIZE_T e_cu_size;
210     S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter;
211     S32 i4_inp_off;
212     S32 i4_min_id;
213     /* Points to the range limits for mv */
214     range_prms_t *ps_range_prms;
215 
216     /*************************************************************************/
217     /* These functions pointers for calculating Err and the result update    */
218     /* Each carries its own parameters structure, which is generated on the  */
219     /* fly in this function                                                  */
220     /*************************************************************************/
221     err_prms_t s_err_prms;
222     result_upd_prms_t s_result_prms;
223 
224     max_num_iters = ps_search_prms->i4_max_iters;
225     /* Using the member 0 to store for all ref. idx., see in coarsest */
226     ps_range_prms = ps_search_prms->aps_mv_range[0];
227     i4_inp_stride = ps_search_prms->i4_inp_stride;
228     /* Move to the location of the search blk in inp buffer */
229     i4_inp_off = ps_search_prms->i4_cu_x_off;
230     i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride);
231 
232     ps_search_results = ps_search_prms->ps_search_results;
233 
234     /*************************************************************************/
235     /* Depending on flag i4_use_rec, we use either input of previously       */
236     /* encoded pictures or we use recon of previously encoded pictures.      */
237     /*************************************************************************/
238     if(ps_search_prms->i4_use_rec == 1)
239     {
240         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
241         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
242     }
243     else
244     {
245         i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
246         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
247     }
248     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
249 
250     /*************************************************************************/
251     /* Obtain the blk size of the search blk. Assumed here that the search   */
252     /* is done on a CU size, rather than any arbitrary blk size.             */
253     /*************************************************************************/
254     ps_search_results = ps_search_prms->ps_search_results;
255     e_blk_size = ps_search_prms->e_blk_size;
256     i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size];
257     i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size];
258     e_cu_size = ps_search_results->e_cu_size;
259     i4_num_results = (S32)ps_search_results->u1_num_results_per_part;
260 
261     ps_search_candts = ps_search_prms->ps_search_candts;
262     i4_num_candts = ps_search_prms->i4_num_init_candts;
263     i4_part_mask = ps_search_prms->i4_part_mask;
264 
265     /*************************************************************************/
266     /* This array stores the ids of the partitions whose                     */
267     /* SADs are updated. Since the partitions whose SADs are updated may not */
268     /* be in contiguous order, we supply another level of indirection.       */
269     /*************************************************************************/
270     hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
271 
272     /* Update the parameters used to pass to SAD */
273     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
274     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
275     /* varying params which are updated just before calling fxn  */
276     s_err_prms.i4_inp_stride = i4_inp_stride;
277     s_err_prms.i4_ref_stride = i4_ref_stride;
278     s_err_prms.i4_part_mask = i4_part_mask;
279     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
280     s_err_prms.i4_blk_wd = i4_blk_wd;
281     s_err_prms.i4_blk_ht = i4_blk_ht;
282     s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids;
283 
284     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
285     s_result_prms.ps_search_results = ps_search_results;
286     s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
287     s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx;
288     s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask;
289     s_result_prms.ps_search_node_base = &s_search_node;
290     s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
291 
292     /* Run through each of the candts in a loop */
293     for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++)
294     {
295         S32 i4_num_refine;
296 
297         i4_step = ps_search_prms->i4_start_step;
298 
299         s_search_node = *(ps_search_candts->ps_search_node);
300 
301         /* initialize minimum cost for this candidate. As we search around */
302         /* this candidate, this is used to check early exit, when in any   */
303         /* given iteration, the center pt of the grid is lowest value      */
304         s_result_prms.i4_min_cost = MAX_32BIT_VAL;
305 
306         /* If we need to do refinements, then we need to evaluate */
307         /* neighbouring pts. Before doing so, we have to do       */
308         /* basic range checks against max allowed mvs             */
309         i4_num_refine = ps_search_candts->u1_num_steps_refine;
310 
311         CLIP_MV_WITHIN_RANGE(
312             s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0);
313 
314         /* The first time, we search all 8 pts around init candt plus the init candt */
315         i4_grid_mask = 0x1ff;
316         s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off;
317 
318         for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++)
319         {
320             i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
321 
322             s_err_prms.i4_grid_mask = i4_grid_mask;
323             s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
324             s_err_prms.pu1_ref +=
325                 (s_search_node.s_mv.i2_mvx +
326                  (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
327 
328             s_result_prms.i4_step = i4_step;
329             s_err_prms.i4_step = i4_step;
330             s_result_prms.i4_grid_mask = i4_grid_mask;
331 
332             /* For Top,TopLeft and Left cand., get only center point SAD    */
333             /* and do early exit                                            */
334             if(0 == i4_num_refine)
335             {
336                 s_err_prms.i4_grid_mask = 0x1;
337                 s_result_prms.i4_grid_mask = 0x1;
338 
339                 /* sad pt fun. populates sad to 0th location, whereas update */
340                 /* fun. takes it based on part. id                           */
341                 s_err_prms.pi4_sad_grid =
342                     s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]);
343 
344                 ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms);
345 
346                 s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid;
347 
348                 if(ME_XTREME_SPEED_25 == e_me_quality_preset)
349                     hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
350                 else
351                     hme_update_results_grid_pu_bestn(&s_result_prms);
352 
353                 i4_min_id = (S32)PT_C; /* Center Point         */
354                 i4_step = 0; /* No further refinment */
355                 s_result_prms.i4_step = i4_step;
356                 s_err_prms.i4_step = i4_step;
357             }
358             else
359             {
360                 if(ME_XTREME_SPEED_25 == e_me_quality_preset)
361                 {
362                     err_prms_t *ps_err_prms = &s_err_prms;
363                     ASSERT(ps_err_prms->i4_grid_mask != 1);
364                     ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
365 
366                     /*****************************************************************/
367                     /* In this case, there are no partial updates. The blk can be    */
368                     /* of any type and need not be a CU. The only thing that matters */
369                     /* here is the width of the blk, 4/8/(>=16)                      */
370                     /*****************************************************************/
371                     ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
372 
373                     hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
374                 }
375                 else
376                 {
377                     /* Obtain SAD for all 9 pts in grid*/
378                     hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
379                 }
380 
381                 /* Early exit in case of centre being local minima */
382                 i4_min_id = s_result_prms.i4_min_id;
383             }
384 
385             i4_grid_mask = gai4_opt_grid_mask[i4_min_id];
386 
387             s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
388             s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
389             if(i4_min_id == (S32)PT_C)
390                 break;
391         }
392 
393         /* Next keep reducing stepsize by factor of 2 */
394         i4_step >>= 1;
395         while(i4_step)
396         {
397             i4_grid_mask = 0x1fe &
398                            hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
399             //i4_grid_mask &= 0x1fe;
400 
401             s_err_prms.i4_grid_mask = i4_grid_mask;
402             s_result_prms.i4_grid_mask = i4_grid_mask;
403             s_err_prms.i4_step = i4_step;
404             s_result_prms.i4_step = i4_step;
405             s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
406             s_err_prms.pu1_ref +=
407                 (s_search_node.s_mv.i2_mvx +
408                  (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
409             if(ME_XTREME_SPEED_25 == e_me_quality_preset)
410             {
411                 err_prms_t *ps_err_prms = &s_err_prms;
412                 ASSERT(ps_err_prms->i4_grid_mask != 1);
413                 ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
414 
415                 /*****************************************************************/
416                 /* In this case, there are no partial updates. The blk can be    */
417                 /* of any type and need not be a CU. The only thing that matters */
418                 /* here is the width of the blk, 4/8/(>=16)                      */
419                 /*****************************************************************/
420                 ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
421 
422                 hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
423             }
424             else
425             {
426                 hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
427             }
428 
429             i4_min_id = s_result_prms.i4_min_id;
430 
431             s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
432             s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
433 
434             i4_step >>= 1;
435         }
436 
437         ps_search_candts++;
438     }
439 }
440 
441 /**
442 ********************************************************************************
443 *  @fn     hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms,
444 *                               layer_ctxt_t *ps_layer_ctxt)
445 *
446 *  @brief  Implements predictive search with square grid refinement. In this
447 *           case, the square grid is of step 1 always. since this is considered
448 *           to be more of a refinement search
449 *
450 *  @param[in,out]  ps_search_prms: All the params to this function
451 *
452 *  @param[in] ps_layer_ctxt: All info about this layer
453 *
454 *  @return None
455 ********************************************************************************
456 */
457 /**
458 ********************************************************************************
459 *  @fn     hme_pred_search(hme_search_prms_t *ps_search_prms,
460 *                               layer_ctxt_t *ps_layer_ctxt)
461 *
462 *  @brief  Implements predictive search after removing duplicate candidates
463 *          from initial list. Each square grid (of step 1) is expanded
464 *          to nine search pts before the dedeuplication process. one point
465 *          cost is then evaluated for each unique node after the deduplication
466 *          process
467 *
468 *  @param[in,out]  ps_search_prms: All the params to this function
469 *
470 *  @param[in] ps_layer_ctxt: All info about this layer
471 *
472 *  @return None
473 ********************************************************************************
474 */
hme_pred_search(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,S08 i1_grid_flag,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)475 void hme_pred_search(
476     hme_search_prms_t *ps_search_prms,
477     layer_ctxt_t *ps_layer_ctxt,
478     wgt_pred_ctxt_t *ps_wt_inp_prms,
479     S08 i1_grid_flag,
480     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
481 
482 )
483 {
484     /* Stores the SAD for all parts at each pt in the grid */
485     S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
486 
487     /* Atributes of input candidates */
488     search_node_t *ps_search_node;
489 
490     search_results_t *ps_search_results;
491     S32 i4_num_nodes, i4_candt;
492 
493     /* Input and reference attributes */
494     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
495 
496     /* The reference is actually an array of ptrs since there are several    */
497     /* reference id. So an array gets passed form calling function           */
498     U08 **ppu1_ref;
499 
500     /* These control number of parts and number of pts in grid to search */
501     S32 i4_part_mask, i4_grid_mask;
502 
503     S32 shift_for_cu_size;
504 
505     /* Blk width, blk height and blk size are derived from input params */
506     BLK_SIZE_T e_blk_size;
507     CU_SIZE_T e_cu_size;
508     S32 i4_blk_wd, i4_blk_ht;
509 
510     /*************************************************************************/
511     /* These functions pointers for calculating Err and the result update    */
512     /* Each carries its own parameters structure, which is generated on the  */
513     /* fly in this function                                                  */
514     /*************************************************************************/
515     PF_RESULT_FXN_T pf_hme_result_fxn;
516     PF_SAD_FXN_T pf_sad_fxn;
517     PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
518     err_prms_t s_err_prms;
519     result_upd_prms_t s_result_prms;
520     S32 i4_num_results;
521     S32 i4_inp_off;
522     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
523 
524     i4_inp_stride = ps_search_prms->i4_inp_stride;
525 
526     /* Move to the location of the search blk in inp buffer */
527     i4_inp_off = ps_search_prms->i4_cu_x_off;
528     i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
529 
530     /*************************************************************************/
531     /* Depending on flag i4_use_rec, we use either input of previously       */
532     /* encoded pictures or we use recon of previously encoded pictures.      */
533     /*************************************************************************/
534     if(ps_search_prms->i4_use_rec == 1)
535     {
536         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
537         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
538     }
539     else
540     {
541         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
542         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
543     }
544     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
545     /* Obtain the blk size of the search blk. Assumed here that the search   */
546     /* is done on a CU size, rather than any arbitrary blk size.             */
547     ps_search_results = ps_search_prms->ps_search_results;
548     e_blk_size = ps_search_prms->e_blk_size;
549     i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
550     i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
551     e_cu_size = ps_search_results->e_cu_size;
552 
553     /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
554     /* This will also set the shift w.r.t. the base cu size of 8x8 */
555     shift_for_cu_size = e_cu_size;
556 
557     ps_search_node = ps_search_prms->ps_search_nodes;
558     i4_num_nodes = ps_search_prms->i4_num_search_nodes;
559     i4_part_mask = ps_search_prms->i4_part_mask;
560 
561     /* Update the parameters used to pass to SAD */
562     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
563     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
564     /* varying params which are updated just before calling fxn  */
565     s_err_prms.i4_inp_stride = i4_inp_stride;
566     s_err_prms.i4_ref_stride = i4_ref_stride;
567     s_err_prms.i4_part_mask = i4_part_mask;
568     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
569     s_err_prms.i4_blk_wd = i4_blk_wd;
570     s_err_prms.i4_blk_ht = i4_blk_ht;
571     s_err_prms.i4_step = 1;
572     s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
573 
574     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
575     s_result_prms.ps_search_results = ps_search_results;
576     s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
577     s_result_prms.pi4_sad_grid = ai4_sad_grid;
578     s_result_prms.i4_part_mask = i4_part_mask;
579     s_result_prms.i4_step = 1;
580     pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn(
581         i1_grid_flag,
582         ps_search_prms->u1_is_cu_noisy,
583         i4_part_mask,
584         ps_fullpel_refine_ctxt->i4_num_valid_parts,
585         ps_search_results->u1_num_results_per_part);
586 
587     pf_calc_sad_and_result(
588         ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
589 }
590 
hme_get_calc_sad_and_result_explicit_fxn(ihevce_me_optimised_function_list_t * ps_me_optimised_function_list,S32 i4_part_mask,S32 i4_num_partitions,S08 i1_grid_enable,U08 u1_num_results_per_part)591 static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn(
592     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
593     S32 i4_part_mask,
594     S32 i4_num_partitions,
595     S08 i1_grid_enable,
596     U08 u1_num_results_per_part)
597 {
598     FT_CALC_SAD_AND_RESULT *pf_func = NULL;
599 
600     if(2 == u1_num_results_per_part)
601     {
602         if(i4_part_mask == 1)
603         {
604             ASSERT(i4_num_partitions == 1);
605 
606             if(i1_grid_enable == 0)
607             {
608                 pf_func =
609                     ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8;
610             }
611             else
612             {
613                 pf_func = ps_me_optimised_function_list
614                               ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
615             }
616         }
617         else
618         {
619             ASSERT(i4_num_partitions == 5);
620 
621             pf_func =
622                 ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
623         }
624     }
625     else if(1 == u1_num_results_per_part)
626     {
627         if(i4_part_mask == 1)
628         {
629             ASSERT(i4_num_partitions == 1);
630 
631             if(i1_grid_enable == 0)
632             {
633                 pf_func =
634                     ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8;
635             }
636             else
637             {
638                 pf_func = ps_me_optimised_function_list
639                               ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
640             }
641         }
642         else
643         {
644             ASSERT(i4_num_partitions == 5);
645 
646             pf_func =
647                 ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
648         }
649     }
650 
651     return pf_func;
652 }
653 
654 /**
655 ********************************************************************************
656 *  @fn     void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
657 *                                         layer_ctxt_t *ps_layer_ctxt,
658 *                                         wgt_pred_ctxt_t *ps_wt_inp_prms,
659 *                                         S32 *pi4_valid_part_ids,
660 *                                         S32 disable_refine,
661 *                                         ME_QUALITY_PRESETS_T e_me_quality_preset)
662 *
663 *  @brief  Implements predictive search after removing duplicate candidates
664 *          from initial list. Each square grid (of step 1) is expanded
665 *          to nine search pts before the dedeuplication process. one point
666 *          cost is then evaluated for each unique node after the deduplication
667 *          process
668 *
669 *  @param[in,out]  ps_search_prms: All the params to this function
670 *
671 *  @param[in] ps_layer_ctxt: All info about this layer
672 *
673 *  @return None
674 ********************************************************************************
675 */
hme_pred_search_no_encode(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 * pi4_valid_part_ids,S32 disable_refine,ME_QUALITY_PRESETS_T e_me_quality_preset,S08 i1_grid_enable,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)676 void hme_pred_search_no_encode(
677     hme_search_prms_t *ps_search_prms,
678     layer_ctxt_t *ps_layer_ctxt,
679     wgt_pred_ctxt_t *ps_wt_inp_prms,
680     S32 *pi4_valid_part_ids,
681     S32 disable_refine,
682     ME_QUALITY_PRESETS_T e_me_quality_preset,
683     S08 i1_grid_enable,
684     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
685 {
686     /* Stores the SAD for all parts at each pt in the grid */
687     S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
688 
689     /* Atributes of input candidates */
690     search_node_t *ps_search_node;
691     search_results_t *ps_search_results;
692     S32 i4_num_nodes;
693 
694     /* Input and reference attributes */
695     S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
696 
697     /* The reference is actually an array of ptrs since there are several    */
698     /* reference id. So an array gets passed form calling function           */
699     U08 **ppu1_ref;
700 
701     /* These control number of parts and number of pts in grid to search */
702     S32 i4_part_mask;  // i4_grid_mask;
703 
704     S32 shift_for_cu_size;
705     /* Blk width, blk height and blk size are derived from input params */
706     BLK_SIZE_T e_blk_size;
707     CU_SIZE_T e_cu_size;
708     S32 i4_blk_wd, i4_blk_ht;
709 
710     /*************************************************************************/
711     /* These functions pointers for calculating Err and the result update    */
712     /* Each carries its own parameters structure, which is generated on the  */
713     /* fly in this function                                                  */
714     /*************************************************************************/
715     PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
716     err_prms_t s_err_prms;
717     result_upd_prms_t s_result_prms;
718     S32 i4_num_results;
719     S32 i4_search_idx = ps_search_prms->i1_ref_idx;
720     S32 i4_inp_off;
721     S32 i4_num_partitions;
722 
723     i4_inp_stride = ps_search_prms->i4_inp_stride;
724 
725     /* Move to the location of the search blk in inp buffer */
726     i4_inp_off = ps_search_prms->i4_cu_x_off;
727     i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
728 
729     /*************************************************************************/
730     /* Depending on flag i4_use_rec, we use either input of previously       */
731     /* encoded pictures or we use recon of previously encoded pictures.      */
732     /*************************************************************************/
733     if(ps_search_prms->i4_use_rec == 1)
734     {
735         i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
736         ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
737     }
738     else
739     {
740         i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
741         ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
742     }
743     i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
744     /* Obtain the blk size of the search blk. Assumed here that the search   */
745     /* is done on a CU size, rather than any arbitrary blk size.             */
746     ps_search_results = ps_search_prms->ps_search_results;
747     e_blk_size = ps_search_prms->e_blk_size;
748     i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
749     i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
750     e_cu_size = ps_search_results->e_cu_size;
751 
752     /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
753     /* This will also set the shift w.r.t. the base cu size of 8x8 */
754     shift_for_cu_size = e_cu_size;
755 
756     ps_search_node = ps_search_prms->ps_search_nodes;
757     i4_num_nodes = ps_search_prms->i4_num_search_nodes;
758     i4_part_mask = ps_search_prms->i4_part_mask;
759 
760     /*************************************************************************/
761     /* This array stores the ids of the partitions whose                     */
762     /* SADs are updated. Since the partitions whose SADs are updated may not */
763     /* be in contiguous order, we supply another level of indirection.       */
764     /*************************************************************************/
765     i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids);
766 
767     /* Update the parameters used to pass to SAD */
768     /* input ptr, strides, SAD Grid, part mask, blk width and ht */
769     /* The above are fixed ptrs, only pu1_ref and grid mask are  */
770     /* varying params which are updated just before calling fxn  */
771     s_err_prms.i4_inp_stride = i4_inp_stride;
772     s_err_prms.i4_ref_stride = i4_ref_stride;
773     s_err_prms.i4_part_mask = i4_part_mask;
774     s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
775     s_err_prms.i4_blk_wd = i4_blk_wd;
776     s_err_prms.i4_blk_ht = i4_blk_ht;
777     s_err_prms.i4_step = 1;
778     s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
779     s_err_prms.i4_num_partitions = i4_num_partitions;
780 
781     s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
782     s_result_prms.ps_search_results = ps_search_results;
783     s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
784     s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
785     s_result_prms.pi4_sad_grid = ai4_sad_grid;
786     s_result_prms.i4_part_mask = i4_part_mask;
787     s_result_prms.i4_step = 1;
788 
789     pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn(
790         ps_me_optimised_function_list,
791         i4_part_mask,
792         i4_num_partitions,
793         i1_grid_enable,
794         ps_search_results->u1_num_results_per_part);
795 
796     pf_calc_sad_and_result(
797         ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
798 }
799