1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_me_utils_instr_set_router.c
23 *
24 * \brief
25 *    This file contains function pointer initialization of me utility
26 *    functions
27 *
28 * \date
29 *    15/07/2013
30 *
31 * \author
32 *    Ittiam
33 *
34 * List of Functions
35 *  ihevce_me_utils_instr_set_router()
36 *
37 ******************************************************************************
38 */
39 
40 /*****************************************************************************/
41 /* File Includes                                                             */
42 /*****************************************************************************/
43 /* System include files */
44 #include <stdio.h>
45 #include <string.h>
46 #include <assert.h>
47 
48 /* User include files */
49 #include "ihevc_typedefs.h"
50 #include "itt_video_api.h"
51 #include "ihevc_chroma_itrans_recon.h"
52 #include "ihevc_chroma_intra_pred.h"
53 #include "ihevc_debug.h"
54 #include "ihevc_deblk.h"
55 #include "ihevc_defs.h"
56 #include "ihevc_itrans_recon.h"
57 #include "ihevc_intra_pred.h"
58 #include "ihevc_inter_pred.h"
59 #include "ihevc_macros.h"
60 #include "ihevc_mem_fns.h"
61 #include "ihevc_padding.h"
62 #include "ihevc_quant_iquant_ssd.h"
63 #include "ihevc_resi_trans.h"
64 #include "ihevc_sao.h"
65 #include "ihevc_structs.h"
66 #include "ihevc_weighted_pred.h"
67 #include "ihevc_platform_macros.h"
68 
69 #include "rc_cntrl_param.h"
70 #include "rc_frame_info_collector.h"
71 #include "rc_look_ahead_params.h"
72 
73 #include "ihevce_api.h"
74 #include "ihevce_defs.h"
75 #include "ihevce_lap_enc_structs.h"
76 #include "ihevce_multi_thrd_structs.h"
77 #include "ihevce_function_selector.h"
78 #include "ihevce_me_common_defs.h"
79 #include "ihevce_enc_structs.h"
80 #include "ihevce_had_satd.h"
81 #include "ihevce_cmn_utils_instr_set_router.h"
82 
83 #include "hme_datatype.h"
84 #include "hme_common_defs.h"
85 #include "hme_common_utils.h"
86 #include "hme_interface.h"
87 #include "hme_defs.h"
88 #include "hme_err_compute.h"
89 #include "hme_globals.h"
90 
91 #include "ihevce_me_instr_set_router.h"
92 
93 /*****************************************************************************/
94 /* Globals                                                                   */
95 /*****************************************************************************/
96 static FT_SAD_EVALUATOR *gapf_sad_pt_npu[NUM_BLK_SIZES];
97 static FT_PART_SADS_EVALUATOR_16X16CU *gpf_part_sads_evaluator_16x16CU;
98 static FT_PART_SADS_EVALUATOR *gpf_part_sads_evaluator_MxM;
99 static FT_SAD_EVALUATOR *gpf_sad_grid_mxn;
100 /* 9 => Number of function types */
101 /* 2 => Number of results to store */
102 static FT_CALC_SAD_AND_RESULT *gapf_calc_sad_and_result_fxn[9][2];
103 
104 static U08 gau1_calc_sad_and_result[2][2][4][TOT_NUM_PARTS] = {
105     //grid flag = 0
106     { //noise = 0
107       { //NxN or NxN & SMP
108         { 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 },
109         //SMP only
110         { 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
111         //AMP
112         { 1, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
113         //2Nx2N only, i.e. num_parts = 1
114         { 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } },
115       //noise = 1
116       { { 5, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
117         { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
118         { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
119         { 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } },
120 
121     //grid flag = 1
122     { //noise = 0
123       { { 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 },
124         { 0, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
125         { 0, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
126         { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } },
127       //noise = 1
128       { { 0, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
129         { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
130         { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
131         { 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } }
132 };
133 
134 /*****************************************************************************/
135 /* Function Definitions                                                      */
136 /*****************************************************************************/
137 /*!
138 ******************************************************************************
139 * \if Function name : ihevce_me_instr_set_router \endif
140 *
141 * \brief
142 *    Function pointer initialization of me utils struct
143 *
144 *****************************************************************************
145 */
ihevce_me_instr_set_router(ihevce_me_optimised_function_list_t * ps_func_list,IV_ARCH_T e_arch)146 void ihevce_me_instr_set_router(ihevce_me_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch)
147 {
148     // clang-format off
149 #ifdef DISABLE_AVX2_INTR
150     e_arch = (e_arch == ARCH_X86_AVX2) ? ARCH_X86_AVX : e_arch;
151 #endif
152 
153     switch(e_arch)
154     {
155 #ifdef ENABLE_NEON
156     case ARCH_ARM_A9Q:
157     case ARCH_ARM_V8_NEON:
158         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
159         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
160         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
161         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit;
162         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
163         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
164         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
165         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit;
166         ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result;
167         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result;
168         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result;
169         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result;
170         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result;
171         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result;
172         ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts;
173         ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts;
174         ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result_neon;
175         ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result_neon;
176         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result_neon;
177         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result_neon;
178         ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result_neon;
179         ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel;
180         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel_neon;
181         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel_neon;
182         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel_neon;
183         ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel_neon;
184         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality_neon;
185         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed_neon;
186         ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk_neon;
187         ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN_neon;
188         ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk_neon;
189         ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
190         ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
191         ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
192         ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
193         ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
194         ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
195         ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
196         ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
197         ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
198         ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp_8x8_neon;
199         ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp_ctb_neon;
200         ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp;
201         ps_func_list->pf_mv_clipper = hme_mv_clipper;
202         ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt_neon;
203         ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse_neon;
204         ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse_neon;
205         ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg_neon;
206         ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality_neon;
207         ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed_neon;
208         break;
209 #endif
210     default:
211         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
212         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
213         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
214         ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit;
215         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
216         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
217         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
218         ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit;
219         ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result;
220         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result;
221         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result;
222         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result;
223         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result;
224         ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result;
225         ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts;
226         ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts;
227         ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result;
228         ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result;
229         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result;
230         ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result;
231         ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result;
232         ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel;
233         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel;
234         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel;
235         ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel;
236         ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel;
237         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality;
238         ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed;
239         ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk;
240         ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN;
241         ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk;
242         ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit;
243         ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit;
244         ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit;
245         ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit;
246         ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit;
247         ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit;
248         ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit;
249         ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit;
250         ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit;
251         ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp;
252         ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp;
253         ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp;
254         ps_func_list->pf_mv_clipper = hme_mv_clipper;
255         ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt;
256         ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse;
257         ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse;
258         ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg;
259         ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality;
260         ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed;
261         break;
262     }
263 
264     gapf_sad_pt_npu[BLK_4x4] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
265     gapf_sad_pt_npu[BLK_4x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
266     gapf_sad_pt_npu[BLK_8x4] = ps_func_list->pf_evalsad_pt_npu_8x4_8bit;
267     gapf_sad_pt_npu[BLK_8x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
268     gapf_sad_pt_npu[BLK_4x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
269     gapf_sad_pt_npu[BLK_8x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
270     gapf_sad_pt_npu[BLK_12x16] = ps_func_list->pf_evalsad_pt_npu_12x16_8bit;
271     gapf_sad_pt_npu[BLK_16x4] = ps_func_list->pf_evalsad_pt_npu_16x4_8bit;
272     gapf_sad_pt_npu[BLK_16x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
273     gapf_sad_pt_npu[BLK_16x12] = ps_func_list->pf_evalsad_pt_npu_16x12_8bit;
274     gapf_sad_pt_npu[BLK_16x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
275     gapf_sad_pt_npu[BLK_8x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
276     gapf_sad_pt_npu[BLK_16x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
277     gapf_sad_pt_npu[BLK_24x32] = ps_func_list->pf_evalsad_pt_npu_24x32_8bit;
278     gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
279     gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
280     gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
281     gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
282     gapf_sad_pt_npu[BLK_32x24] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
283     gapf_sad_pt_npu[BLK_32x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
284     gapf_sad_pt_npu[BLK_16x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
285     gapf_sad_pt_npu[BLK_32x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
286     gapf_sad_pt_npu[BLK_48x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
287     gapf_sad_pt_npu[BLK_64x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
288     gapf_sad_pt_npu[BLK_64x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
289     gapf_sad_pt_npu[BLK_64x48] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
290     gapf_sad_pt_npu[BLK_64x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
291 
292     gpf_part_sads_evaluator_16x16CU = ps_func_list->pf_compute_4x4_sads_for_16x16_blk;
293     gpf_part_sads_evaluator_MxM = ps_func_list->pf_evalsad_grid_pu_MxM;
294 
295     gpf_sad_grid_mxn = ps_func_list->pf_evalsad_grid_npu_MxN;
296 
297     gapf_calc_sad_and_result_fxn[0][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid;
298     gapf_calc_sad_and_result_fxn[1][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1;
299     gapf_calc_sad_and_result_fxn[2][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts;
300     gapf_calc_sad_and_result_fxn[3][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9;
301     gapf_calc_sad_and_result_fxn[4][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17;
302     gapf_calc_sad_and_result_fxn[5][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1;
303     gapf_calc_sad_and_result_fxn[6][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts;
304     gapf_calc_sad_and_result_fxn[7][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9;
305     gapf_calc_sad_and_result_fxn[8][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17;
306     gapf_calc_sad_and_result_fxn[0][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_1_for_grid;
307     gapf_calc_sad_and_result_fxn[1][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_eq_1;
308     gapf_calc_sad_and_result_fxn[2][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_square_parts;
309     gapf_calc_sad_and_result_fxn[3][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_9;
310     gapf_calc_sad_and_result_fxn[4][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_17;
311     gapf_calc_sad_and_result_fxn[5][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_eq_1;
312     gapf_calc_sad_and_result_fxn[6][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_square_parts;
313     gapf_calc_sad_and_result_fxn[7][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_9;
314     gapf_calc_sad_and_result_fxn[8][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_17;
315 }
316 // clang-format on
317 
hme_get_calc_sad_and_result_fxn(S08 i1_grid_flag,U08 u1_is_cu_noisy,S32 i4_part_mask,S32 num_parts,S32 num_results)318 FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_fxn(
319     S08 i1_grid_flag, U08 u1_is_cu_noisy, S32 i4_part_mask, S32 num_parts, S32 num_results)
320 {
321     U08 u1_index;
322 
323     ASSERT((1 == num_results) || (2 == num_results));
324 
325     u1_index =
326         gau1_calc_sad_and_result[i1_grid_flag][u1_is_cu_noisy]
327                                 [(!!(i4_part_mask & (ENABLE_SMP | ENABLE_NxN)) &&
328                                   !(i4_part_mask & ENABLE_AMP))
329                                      ? (!!(i4_part_mask & ENABLE_NxN) ? 0 : 1)
330                                      : (!!(i4_part_mask & ENABLE_AMP) ? 2 : 3)][num_parts - 1];
331 
332     return gapf_calc_sad_and_result_fxn[u1_index][2 == num_results];
333 }
334 
hme_evalsad_grid_pu_MxM(err_prms_t * ps_prms)335 void hme_evalsad_grid_pu_MxM(err_prms_t *ps_prms)
336 {
337     grid_ctxt_t s_grid;
338     cand_t as_candt[9];
339 
340     S32 *api4_sad_grid[TOT_NUM_PARTS];
341 
342     hme_mv_t s_mv = { 0, 0 };
343 
344     CU_SIZE_T e_cu_size = (CU_SIZE_T)(hme_get_range(ps_prms->i4_blk_wd) - 4);
345 
346     S32 i4_ref_idx = 0, i;
347     S32 num_candts = 0;
348 
349     s_grid.num_grids = 1;
350     s_grid.ref_buf_stride = ps_prms->i4_ref_stride;
351     s_grid.grd_sz_y_x = ((ps_prms->i4_step << 16) | ps_prms->i4_step);
352     s_grid.ppu1_ref_ptr = &ps_prms->pu1_ref;
353     s_grid.pi4_grd_mask = &ps_prms->i4_grid_mask;
354     s_grid.p_mv = &s_mv;
355     s_grid.p_ref_idx = &i4_ref_idx;
356 
357     for(i = 0; i < 9; i++)
358     {
359         if(s_grid.pi4_grd_mask[0] & (1 << i))
360         {
361             num_candts++;
362         }
363     }
364 
365     for(i = 0; i < TOT_NUM_PARTS; i++)
366     {
367         api4_sad_grid[i] = &ps_prms->pi4_sad_grid[i * num_candts];
368     }
369 
370     gpf_part_sads_evaluator_MxM(
371         &s_grid,
372         ps_prms->pu1_inp,
373         ps_prms->i4_inp_stride,
374         (WORD32 **)api4_sad_grid,
375         as_candt,
376         &num_candts,
377         e_cu_size);
378 }
379 
hme_get_sad_fxn(BLK_SIZE_T e_blk_size,S32 i4_grid_mask,S32 i4_part_mask)380 PF_SAD_FXN_T hme_get_sad_fxn(BLK_SIZE_T e_blk_size, S32 i4_grid_mask, S32 i4_part_mask)
381 {
382     S32 i4_grid_en = ((i4_grid_mask & 0x1fe) != 0);
383 
384     if(i4_grid_en)
385     {
386         if(i4_part_mask & (i4_part_mask - 1))
387         {
388             if(BLK_16x16 == e_blk_size)
389             {
390                 return hme_evalsad_grid_pu_16x16;
391             }
392             else
393             {
394                 return hme_evalsad_grid_pu_MxM;
395             }
396         }
397         else
398         {
399             return gpf_sad_grid_mxn;
400         }
401     }
402     else
403     {
404         if(i4_part_mask & (i4_part_mask - 1))
405         {
406             if(BLK_16x16 == e_blk_size)
407             {
408                 return hme_evalsad_grid_pu_16x16;
409             }
410             else
411             {
412                 return hme_evalsad_grid_pu_MxM;
413             }
414         }
415         else
416         {
417             return gapf_sad_pt_npu[e_blk_size];
418         }
419     }
420 }
421 
ihevce_sifter_sad_fxn_assigner(FT_SAD_EVALUATOR ** ppf_evalsad_pt_npu_mxn,IV_ARCH_T e_arch)422 void ihevce_sifter_sad_fxn_assigner(FT_SAD_EVALUATOR **ppf_evalsad_pt_npu_mxn, IV_ARCH_T e_arch)
423 {
424     switch(e_arch)
425     {
426 #ifdef ENABLE_NEON
427     case ARCH_ARM_A9Q:
428     case ARCH_ARM_V8_NEON:
429         ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit_neon;
430         break;
431 #endif
432 
433     default:
434         ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit;
435         break;
436     }
437 }
438