1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 ******************************************************************************
23 * @file hme_subpel.c
24 *
25 * @brief
26 *    Fullpel search and refinement
27 *
28 * @author
29 *    Ittiam
30 *
31 ******************************************************************************
32 */
33 
34 /*****************************************************************************/
35 /* File Includes                                                             */
36 /*****************************************************************************/
37 /* System include files */
38 #include <stdio.h>
39 #include <string.h>
40 #include <stdlib.h>
41 #include <assert.h>
42 #include <stdarg.h>
43 #include <math.h>
44 #include <limits.h>
45 
46 /* User include files */
47 #include "ihevc_typedefs.h"
48 #include "itt_video_api.h"
49 #include "ihevce_api.h"
50 
51 #include "rc_cntrl_param.h"
52 #include "rc_frame_info_collector.h"
53 #include "rc_look_ahead_params.h"
54 
55 #include "ihevc_defs.h"
56 #include "ihevc_structs.h"
57 #include "ihevc_platform_macros.h"
58 #include "ihevc_deblk.h"
59 #include "ihevc_itrans_recon.h"
60 #include "ihevc_chroma_itrans_recon.h"
61 #include "ihevc_chroma_intra_pred.h"
62 #include "ihevc_intra_pred.h"
63 #include "ihevc_inter_pred.h"
64 #include "ihevc_mem_fns.h"
65 #include "ihevc_padding.h"
66 #include "ihevc_weighted_pred.h"
67 #include "ihevc_sao.h"
68 #include "ihevc_resi_trans.h"
69 #include "ihevc_quant_iquant_ssd.h"
70 #include "ihevc_cabac_tables.h"
71 
72 #include "ihevce_defs.h"
73 #include "ihevce_lap_enc_structs.h"
74 #include "ihevce_multi_thrd_structs.h"
75 #include "ihevce_multi_thrd_funcs.h"
76 #include "ihevce_me_common_defs.h"
77 #include "ihevce_had_satd.h"
78 #include "ihevce_error_codes.h"
79 #include "ihevce_bitstream.h"
80 #include "ihevce_cabac.h"
81 #include "ihevce_rdoq_macros.h"
82 #include "ihevce_function_selector.h"
83 #include "ihevce_enc_structs.h"
84 #include "ihevce_entropy_structs.h"
85 #include "ihevce_cmn_utils_instr_set_router.h"
86 #include "ihevce_enc_loop_structs.h"
87 #include "ihevce_bs_compute_ctb.h"
88 #include "ihevce_global_tables.h"
89 #include "ihevce_dep_mngr_interface.h"
90 #include "hme_datatype.h"
91 #include "hme_interface.h"
92 #include "hme_common_defs.h"
93 #include "hme_defs.h"
94 #include "ihevce_me_instr_set_router.h"
95 #include "hme_globals.h"
96 #include "hme_utils.h"
97 #include "hme_coarse.h"
98 #include "hme_refine.h"
99 #include "hme_err_compute.h"
100 #include "hme_common_utils.h"
101 #include "hme_search_algo.h"
102 #include "ihevce_stasino_helpers.h"
103 
104 /**
105 ********************************************************************************
106 *  @fn     hme_fullpel_cand_sifter
107 *
108 *  @brief  Given a list of search candidates and valid partition types,
109 *          this function finds the two best candidates for each partition type.
110 *
111 *  @return None
112 ********************************************************************************
113 */
hme_fullpel_cand_sifter(hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,S32 i4_alpha_stim_multiplier,U08 u1_is_cu_noisy,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)114 void hme_fullpel_cand_sifter(
115     hme_search_prms_t *ps_search_prms,
116     layer_ctxt_t *ps_layer_ctxt,
117     wgt_pred_ctxt_t *ps_wt_inp_prms,
118     S32 i4_alpha_stim_multiplier,
119     U08 u1_is_cu_noisy,
120     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
121 {
122     S32 i4_i;
123     S16 i2_temp_tot_cost, i2_temp_stim_injected_cost, i2_temp_mv_cost, i2_temp_mv_x, i2_temp_mv_y,
124         i2_temp_ref_idx;
125 
126     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
127     S32 i4_temp_part_mask;
128 
129     ps_search_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
130     ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
131 
132     if(u1_is_cu_noisy)
133     {
134         i4_temp_part_mask = ps_search_prms->i4_part_mask;
135         ps_search_prms->i4_part_mask &= ((ENABLE_2Nx2N) | (ENABLE_NxN));
136 
137         ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
138             (ps_search_prms->i4_part_mask) & ((ENABLE_2Nx2N) | (ENABLE_NxN)),
139             &ps_fullpel_refine_ctxt->ai4_part_id[0]);
140     }
141 
142     ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
143 
144     hme_pred_search(
145         ps_search_prms, ps_layer_ctxt, ps_wt_inp_prms, 0, ps_me_optimised_function_list);
146 
147     if(u1_is_cu_noisy)
148     {
149         if(ps_search_prms->ps_search_results->u1_num_results_per_part == 2)
150         {
151             for(i4_i = 0; i4_i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i4_i++)
152             {
153                 if(ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] >
154                    ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i])
155                 {
156                     i2_temp_tot_cost = ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i];
157                     i2_temp_stim_injected_cost =
158                         ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i];
159                     i2_temp_mv_cost = ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i];
160                     i2_temp_mv_x = ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i];
161                     i2_temp_mv_y = ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i];
162                     i2_temp_ref_idx = ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i];
163 
164                     ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] =
165                         ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i];
166                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
167                         ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i];
168                     ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] =
169                         ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i];
170                     ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] =
171                         ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i];
172                     ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] =
173                         ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i];
174                     ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] =
175                         ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i];
176 
177                     ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = i2_temp_tot_cost;
178                     ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
179                         i2_temp_stim_injected_cost;
180                     ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = i2_temp_mv_cost;
181                     ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = i2_temp_mv_x;
182                     ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = i2_temp_mv_y;
183                     ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = i2_temp_ref_idx;
184                 }
185             }
186         }
187 
188         ps_search_prms->i4_part_mask = i4_temp_part_mask;
189 
190         ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
191             ps_search_prms->i4_part_mask, &ps_fullpel_refine_ctxt->ai4_part_id[0]);
192     }
193 }
194 
hme_add_fpel_refine_candidates_to_search_cand_array(search_node_t * ps_unique_search_nodes,fullpel_refine_ctxt_t * ps_fullpel_refine_ctxt,S32 * pi4_num_unique_nodes,U32 * pu4_unique_node_map,S32 i4_fpel_search_result_id,S32 i4_fpel_search_result_array_index,S32 i4_unique_node_map_center_x,S32 i4_unique_node_map_center_y,S08 i1_unique_node_map_ref_idx,U08 u1_add_refine_grid_center_to_search_cand_array,U08 u1_do_not_check_for_duplicates)195 static void hme_add_fpel_refine_candidates_to_search_cand_array(
196     search_node_t *ps_unique_search_nodes,
197     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt,
198     S32 *pi4_num_unique_nodes,
199     U32 *pu4_unique_node_map,
200     S32 i4_fpel_search_result_id,
201     S32 i4_fpel_search_result_array_index,
202     S32 i4_unique_node_map_center_x,
203     S32 i4_unique_node_map_center_y,
204     S08 i1_unique_node_map_ref_idx,
205     U08 u1_add_refine_grid_center_to_search_cand_array,
206     U08 u1_do_not_check_for_duplicates)
207 {
208     search_node_t s_refine_grid_center;
209 
210     U08 u1_use_hashing, i;
211 
212     S32 i2_mvx =
213         ps_fullpel_refine_ctxt->i2_mv_x[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
214     S32 i2_mvy =
215         ps_fullpel_refine_ctxt->i2_mv_y[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
216     S08 i1_ref_idx = ps_fullpel_refine_ctxt
217                          ->i2_ref_idx[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
218 
219     if(!u1_do_not_check_for_duplicates)
220     {
221         s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
222         s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
223         s_refine_grid_center.i1_ref_idx = i1_ref_idx;
224 
225         u1_use_hashing = (s_refine_grid_center.i1_ref_idx == i1_unique_node_map_ref_idx);
226 
227         for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
228         {
229             S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
230             S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
231 
232             if(i1_offset_x || i1_offset_y)
233             {
234                 s_refine_grid_center.s_mv.i2_mvx = i2_mvx + i1_offset_x;
235                 s_refine_grid_center.s_mv.i2_mvy = i2_mvy + i1_offset_y;
236 
237                 INSERT_NEW_NODE(
238                     ps_unique_search_nodes,
239                     pi4_num_unique_nodes[0],
240                     s_refine_grid_center,
241                     1,
242                     pu4_unique_node_map,
243                     i4_unique_node_map_center_x,
244                     i4_unique_node_map_center_y,
245                     u1_use_hashing);
246             }
247             else if(u1_add_refine_grid_center_to_search_cand_array)
248             {
249                 s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
250                 s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
251 
252                 INSERT_NEW_NODE(
253                     ps_unique_search_nodes,
254                     pi4_num_unique_nodes[0],
255                     s_refine_grid_center,
256                     1,
257                     pu4_unique_node_map,
258                     i4_unique_node_map_center_x,
259                     i4_unique_node_map_center_y,
260                     0);
261             }
262         }
263     }
264     else
265     {
266         for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
267         {
268             S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
269             S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
270 
271             if(i1_offset_x || i1_offset_y)
272             {
273                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx + i1_offset_x;
274                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy + i1_offset_y;
275                 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
276             }
277             else if(u1_add_refine_grid_center_to_search_cand_array)
278             {
279                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx;
280                 ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy;
281                 ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
282             }
283         }
284     }
285 }
286 
hme_fullpel_refine(refine_prms_t * ps_refine_prms,hme_search_prms_t * ps_search_prms,layer_ctxt_t * ps_layer_ctxt,wgt_pred_ctxt_t * ps_wt_inp_prms,U32 * pu4_unique_node_map,U08 u1_num_init_search_cands,U08 u1_8x8_blk_mask,S32 i4_unique_node_map_center_x,S32 i4_unique_node_map_center_y,S08 i1_unique_node_map_ref_idx,ME_QUALITY_PRESETS_T e_quality_preset,ihevce_me_optimised_function_list_t * ps_me_optimised_function_list)287 void hme_fullpel_refine(
288     refine_prms_t *ps_refine_prms,
289     hme_search_prms_t *ps_search_prms,
290     layer_ctxt_t *ps_layer_ctxt,
291     wgt_pred_ctxt_t *ps_wt_inp_prms,
292     U32 *pu4_unique_node_map,
293     U08 u1_num_init_search_cands,
294     U08 u1_8x8_blk_mask,
295     S32 i4_unique_node_map_center_x,
296     S32 i4_unique_node_map_center_y,
297     S08 i1_unique_node_map_ref_idx,
298     ME_QUALITY_PRESETS_T e_quality_preset,
299     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
300 {
301     S32 i, j;
302     S32 i4_num_results;
303     U08 u1_num_complete_grids = 0;
304     U08 u1_num_grids = 0;
305 
306     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
307 
308     S32 i4_num_unique_nodes = 0;
309 
310     search_node_t *ps_unique_search_nodes = ps_search_prms->ps_search_nodes;
311 
312     if(u1_num_init_search_cands >= 2)
313     {
314         S32 i4_max_num_results = (15 == u1_8x8_blk_mask)
315                                      ? ps_refine_prms->u1_max_num_fpel_refine_centers
316                                      : ((ME_XTREME_SPEED_25 == e_quality_preset)
317                                             ? MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25
318                                             : INT_MAX);
319 
320         for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
321         {
322             S32 i4_part_id;
323             S32 i4_index;
324 
325             i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
326             i4_index = (ps_fullpel_refine_ctxt->i4_num_valid_parts > 8) ? i4_part_id : i;
327             i4_num_results = (15 == u1_8x8_blk_mask)
328                                  ? MIN(ps_search_prms->ps_search_results->u1_num_results_per_part,
329                                        ps_refine_prms->pu1_num_best_results[i4_part_id])
330                                  : ps_search_prms->ps_search_results->u1_num_results_per_part;
331 
332             ASSERT(i4_num_results <= 2);
333 
334             for(j = 0; j < i4_num_results; j++)
335             {
336                 if((ps_fullpel_refine_ctxt->i2_ref_idx[j][i4_index] >= 0) &&
337                    (ps_fullpel_refine_ctxt->i2_mv_x[j][i4_index] != INTRA_MV))
338                 {
339                     S32 i4_num_nodes_added = i4_num_unique_nodes;
340 
341                     hme_add_fpel_refine_candidates_to_search_cand_array(
342                         ps_unique_search_nodes,
343                         ps_fullpel_refine_ctxt,
344                         &i4_num_unique_nodes,
345                         pu4_unique_node_map,
346                         j,
347                         i4_index,
348                         i4_unique_node_map_center_x,
349                         i4_unique_node_map_center_y,
350                         i1_unique_node_map_ref_idx,
351                         0,
352                         0);
353 
354                     i4_num_nodes_added = i4_num_unique_nodes - i4_num_nodes_added;
355 
356                     u1_num_complete_grids +=
357                         (i4_num_nodes_added >= (NUM_POINTS_IN_RECTANGULAR_GRID - 1));
358                     u1_num_grids += (!!i4_num_nodes_added);
359 
360                     i4_max_num_results--;
361                 }
362 
363                 if(i4_max_num_results <= 0)
364                 {
365                     break;
366                 }
367             }
368 
369             if(i4_max_num_results <= 0)
370             {
371                 break;
372             }
373         }
374     }
375     else if((1 == u1_num_init_search_cands) && (ps_refine_prms->u1_max_num_fpel_refine_centers >= 1))
376     {
377         ps_fullpel_refine_ctxt->i2_mv_x[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvx;
378         ps_fullpel_refine_ctxt->i2_mv_y[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvy;
379         ps_fullpel_refine_ctxt->i2_ref_idx[0][0] = ps_unique_search_nodes[0].i1_ref_idx;
380 
381         if((ps_fullpel_refine_ctxt->i2_ref_idx[0][0] >= 0) &&
382            (ps_fullpel_refine_ctxt->i2_mv_x[0][0] != INTRA_MV))
383         {
384             hme_add_fpel_refine_candidates_to_search_cand_array(
385                 ps_unique_search_nodes,
386                 ps_fullpel_refine_ctxt,
387                 &i4_num_unique_nodes,
388                 pu4_unique_node_map,
389                 0,
390                 0,
391                 i4_unique_node_map_center_x,
392                 i4_unique_node_map_center_y,
393                 i1_unique_node_map_ref_idx,
394                 1,
395                 1);
396 
397             u1_num_complete_grids++;
398         }
399     }
400 
401     if(i4_num_unique_nodes > 0)
402     {
403         ps_search_prms->i4_num_search_nodes = i4_num_unique_nodes;
404         ps_search_prms->u1_is_cu_noisy = 0;
405 
406         hme_pred_search(
407             ps_search_prms,
408             ps_layer_ctxt,
409             ps_wt_inp_prms,
410             (1 == u1_num_complete_grids) && (u1_num_grids == u1_num_complete_grids),
411             ps_me_optimised_function_list
412 
413         );
414     }
415 }
416 
417 /**
418 ********************************************************************************
419 *  @fn     hme_remove_duplicate_fpel_search_candidates
420 *
421 *  @brief  Function name is self-explanatory
422 *
423 *  @return Number of unique candidates
424 ********************************************************************************
425 */
hme_remove_duplicate_fpel_search_candidates(search_node_t * ps_unique_search_nodes,search_candt_t * ps_search_candts,U32 * pu4_unique_node_map,S08 * pi1_pred_dir_to_ref_idx,S32 i4_num_srch_cands,S32 i4_num_init_candts,S32 i4_refine_iter_ctr,S32 i4_num_refinement_iterations,S32 i4_num_act_ref_l0,S08 i1_unique_node_map_ref_idx,S32 i4_unique_node_map_center_x,S32 i4_unique_node_map_center_y,U08 u1_is_bidir_enabled,ME_QUALITY_PRESETS_T e_quality_preset)426 S32 hme_remove_duplicate_fpel_search_candidates(
427     search_node_t *ps_unique_search_nodes,
428     search_candt_t *ps_search_candts,
429     U32 *pu4_unique_node_map,
430     S08 *pi1_pred_dir_to_ref_idx,
431     S32 i4_num_srch_cands,
432     S32 i4_num_init_candts,
433     S32 i4_refine_iter_ctr,
434     S32 i4_num_refinement_iterations,
435     S32 i4_num_act_ref_l0,
436     S08 i1_unique_node_map_ref_idx,
437     S32 i4_unique_node_map_center_x,
438     S32 i4_unique_node_map_center_y,
439     U08 u1_is_bidir_enabled,
440     ME_QUALITY_PRESETS_T e_quality_preset)
441 {
442     S32 i;
443 
444     S32 i4_max_num_cands = ((!u1_is_bidir_enabled) && (i4_num_act_ref_l0 > 1))
445                                ? (i4_num_init_candts >> 1)
446                                : i4_num_init_candts;
447     S32 i4_num_unique_nodes = 0;
448 
449     for(i = 0; (i < i4_num_srch_cands) && (i4_num_unique_nodes < i4_max_num_cands); i++)
450     {
451         search_node_t *ps_cur_cand = ps_search_candts[i].ps_search_node;
452 
453         U08 u1_use_hashing = (ps_cur_cand->i1_ref_idx == i1_unique_node_map_ref_idx);
454 
455         if(i4_num_refinement_iterations > 1)
456         {
457 #if !ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
458             /* Ref0 evaluated during the first iteration */
459             /* All other Ref's evaluated during the second iteration */
460             if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) && (i4_refine_iter_ctr == 0))
461             {
462                 continue;
463             }
464 #else
465             if(e_quality_preset == ME_HIGH_QUALITY)
466             {
467                 if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) &&
468                    (i4_refine_iter_ctr == 0))
469                 {
470                     continue;
471                 }
472             }
473             else
474             {
475                 if(ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr])
476                 {
477                     continue;
478                 }
479             }
480 #endif
481         }
482 
483         INSERT_UNIQUE_NODE(
484             ps_unique_search_nodes,
485             i4_num_unique_nodes,
486             ps_cur_cand[0],
487             pu4_unique_node_map,
488             i4_unique_node_map_center_x,
489             i4_unique_node_map_center_y,
490             u1_use_hashing);
491     }
492 
493     return i4_num_unique_nodes;
494 }
495