1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /*****************************************************************************/
22 /* File Includes                                                             */
23 /*****************************************************************************/
24 /* System include files */
25 #include <stdio.h>
26 #include <string.h>
27 #include <stdlib.h>
28 #include <assert.h>
29 #include <stdarg.h>
30 #include <math.h>
31 #include <limits.h>
32 
33 /* User include files */
34 #include "ihevc_typedefs.h"
35 #include "itt_video_api.h"
36 #include "ihevce_api.h"
37 
38 #include "rc_cntrl_param.h"
39 #include "rc_frame_info_collector.h"
40 #include "rc_look_ahead_params.h"
41 
42 #include "ihevc_defs.h"
43 #include "ihevc_structs.h"
44 #include "ihevc_platform_macros.h"
45 #include "ihevc_deblk.h"
46 #include "ihevc_itrans_recon.h"
47 #include "ihevc_chroma_itrans_recon.h"
48 #include "ihevc_chroma_intra_pred.h"
49 #include "ihevc_intra_pred.h"
50 #include "ihevc_inter_pred.h"
51 #include "ihevc_mem_fns.h"
52 #include "ihevc_padding.h"
53 #include "ihevc_weighted_pred.h"
54 #include "ihevc_sao.h"
55 #include "ihevc_resi_trans.h"
56 #include "ihevc_quant_iquant_ssd.h"
57 #include "ihevc_cabac_tables.h"
58 
59 #include "ihevce_defs.h"
60 #include "ihevce_lap_enc_structs.h"
61 #include "ihevce_multi_thrd_structs.h"
62 #include "ihevce_multi_thrd_funcs.h"
63 #include "ihevce_me_common_defs.h"
64 #include "ihevce_had_satd.h"
65 #include "ihevce_error_codes.h"
66 #include "ihevce_bitstream.h"
67 #include "ihevce_cabac.h"
68 #include "ihevce_rdoq_macros.h"
69 #include "ihevce_function_selector.h"
70 #include "ihevce_enc_structs.h"
71 #include "ihevce_entropy_structs.h"
72 #include "ihevce_cmn_utils_instr_set_router.h"
73 #include "ihevce_enc_loop_structs.h"
74 #include "ihevce_bs_compute_ctb.h"
75 #include "ihevce_global_tables.h"
76 #include "ihevce_dep_mngr_interface.h"
77 #include "hme_datatype.h"
78 #include "hme_interface.h"
79 #include "hme_common_defs.h"
80 #include "hme_defs.h"
81 #include "ihevce_me_instr_set_router.h"
82 #include "hme_globals.h"
83 #include "hme_utils.h"
84 #include "hme_coarse.h"
85 #include "hme_refine.h"
86 #include "hme_err_compute.h"
87 #include "hme_common_utils.h"
88 #include "hme_search_algo.h"
89 #include "ihevce_profile.h"
90 
91 /*****************************************************************************/
92 /* Function Definitions                                                      */
93 /*****************************************************************************/
94 
hme_init_globals()95 void hme_init_globals()
96 {
97     GRID_PT_T id;
98     S32 i, j;
99     /*************************************************************************/
100     /* Initialize the lookup table for x offset, y offset, optimized mask    */
101     /* based on grid id. The design is as follows:                           */
102     /*                                                                       */
103     /*     a  b  c  d                                                        */
104     /*    TL  T TR  e                                                        */
105     /*     L  C  R  f                                                        */
106     /*    BL  B BR                                                           */
107     /*                                                                       */
108     /*  IF a non corner pt, like T is the new minima, then we need to        */
109     /*  evaluate only 3 new pts, in this case, a, b, c. So the optimal       */
110     /*  grid mask would reflect this. If a corner pt like TR is the new      */
111     /*  minima, then we need to evaluate 5 new pts, in this case, b, c, d,   */
112     /*  e and f. So the grid mask will have 5 pts enabled.                   */
113     /*************************************************************************/
114 
115     id = PT_C;
116     gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
117     gai1_grid_id_to_x[id] = 0;
118     gai1_grid_id_to_y[id] = 0;
119     gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
120     gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
121 
122     id = PT_L;
123     gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
124     gai1_grid_id_to_x[id] = -1;
125     gai1_grid_id_to_y[id] = 0;
126     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
127     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
128 
129     id = PT_R;
130     gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
131     gai1_grid_id_to_x[id] = 1;
132     gai1_grid_id_to_y[id] = 0;
133     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
134     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
135 
136     id = PT_T;
137     gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
138     gai1_grid_id_to_x[id] = 0;
139     gai1_grid_id_to_y[id] = -1;
140     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
141     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
142 
143     id = PT_B;
144     gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
145     gai1_grid_id_to_x[id] = 0;
146     gai1_grid_id_to_y[id] = 1;
147     gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
148     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
149 
150     id = PT_TL;
151     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
152     gai1_grid_id_to_x[id] = -1;
153     gai1_grid_id_to_y[id] = -1;
154     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
155 
156     id = PT_TR;
157     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
158     gai1_grid_id_to_x[id] = 1;
159     gai1_grid_id_to_y[id] = -1;
160     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
161 
162     id = PT_BL;
163     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
164     gai1_grid_id_to_x[id] = -1;
165     gai1_grid_id_to_y[id] = 1;
166     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
167 
168     id = PT_BR;
169     gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
170     gai1_grid_id_to_x[id] = 1;
171     gai1_grid_id_to_y[id] = 1;
172     gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
173 
174     ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
175     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
176     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
177     ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
178     ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
179     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
180     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
181     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
182     ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
183     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
184     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
185     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
186     ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
187     ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
188     ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
189     ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
190     ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
191 
192     ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
193     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
194     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
195     ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
196     ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
197     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
198     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
199     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
200     ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
201     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
202     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
203     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
204     ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
205     ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
206     ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
207     ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
208     ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
209 
210     ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
211     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
212     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
213     ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
214     ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
215     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
216     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
217     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
218     ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
219     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
220     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
221     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
222     ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
223     ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
224     ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
225     ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
226     ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
227 
228     ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
229     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
230     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
231     ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
232     ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
233     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
234     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
235     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
236     ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
237     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
238     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
239     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
240     ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
241     ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
242     ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
243     ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
244     ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
245 
246     gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
247     gau1_num_parts_in_part_type[PRT_2NxN] = 2;
248     gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
249     gau1_num_parts_in_part_type[PRT_NxN] = 4;
250     gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
251     gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
252     gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
253     gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
254 
255     for(i = 0; i < MAX_PART_TYPES; i++)
256         for(j = 0; j < MAX_NUM_PARTS; j++)
257             ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
258 
259     /* 2Nx2N only one partition */
260     ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
261 
262     /* 2NxN 2 partitions */
263     ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
264     ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
265 
266     /* Nx2N 2 partitions */
267     ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
268     ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
269 
270     /* NxN 4 partitions */
271     ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
272     ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
273     ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
274     ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
275 
276     /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
277     ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
278     ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
279 
280     /* AMP 2Nx (3N/2 + N/2) 2 partitions */
281     ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
282     ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
283 
284     /* AMP (N/2 + 3N/2) x 2N 2 partitions */
285     ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
286     ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
287 
288     /* AMP (3N/2 + N/2) x 2N 2 partitions */
289     ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
290     ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
291 
292     /*************************************************************************/
293     /* initialize attributes for each partition id within the cu.            */
294     /*************************************************************************/
295     {
296         part_attr_t *ps_part_attr;
297 
298         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
299         ps_part_attr->u1_x_start = 0;
300         ps_part_attr->u1_y_start = 0;
301         ps_part_attr->u1_x_count = 8;
302         ps_part_attr->u1_y_count = 8;
303 
304         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
305         ps_part_attr->u1_x_start = 0;
306         ps_part_attr->u1_y_start = 0;
307         ps_part_attr->u1_x_count = 8;
308         ps_part_attr->u1_y_count = 4;
309 
310         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
311         ps_part_attr->u1_x_start = 0;
312         ps_part_attr->u1_y_start = 4;
313         ps_part_attr->u1_x_count = 8;
314         ps_part_attr->u1_y_count = 4;
315 
316         ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
317         ps_part_attr->u1_x_start = 0;
318         ps_part_attr->u1_y_start = 0;
319         ps_part_attr->u1_x_count = 4;
320         ps_part_attr->u1_y_count = 8;
321 
322         ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
323         ps_part_attr->u1_x_start = 4;
324         ps_part_attr->u1_y_start = 0;
325         ps_part_attr->u1_x_count = 4;
326         ps_part_attr->u1_y_count = 8;
327 
328         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
329         ps_part_attr->u1_x_start = 0;
330         ps_part_attr->u1_y_start = 0;
331         ps_part_attr->u1_x_count = 4;
332         ps_part_attr->u1_y_count = 4;
333 
334         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
335         ps_part_attr->u1_x_start = 4;
336         ps_part_attr->u1_y_start = 0;
337         ps_part_attr->u1_x_count = 4;
338         ps_part_attr->u1_y_count = 4;
339 
340         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
341         ps_part_attr->u1_x_start = 0;
342         ps_part_attr->u1_y_start = 4;
343         ps_part_attr->u1_x_count = 4;
344         ps_part_attr->u1_y_count = 4;
345 
346         ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
347         ps_part_attr->u1_x_start = 4;
348         ps_part_attr->u1_y_start = 4;
349         ps_part_attr->u1_x_count = 4;
350         ps_part_attr->u1_y_count = 4;
351 
352         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
353         ps_part_attr->u1_x_start = 0;
354         ps_part_attr->u1_y_start = 0;
355         ps_part_attr->u1_x_count = 8;
356         ps_part_attr->u1_y_count = 2;
357 
358         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
359         ps_part_attr->u1_x_start = 0;
360         ps_part_attr->u1_y_start = 2;
361         ps_part_attr->u1_x_count = 8;
362         ps_part_attr->u1_y_count = 6;
363 
364         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
365         ps_part_attr->u1_x_start = 0;
366         ps_part_attr->u1_y_start = 0;
367         ps_part_attr->u1_x_count = 8;
368         ps_part_attr->u1_y_count = 6;
369 
370         ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
371         ps_part_attr->u1_x_start = 0;
372         ps_part_attr->u1_y_start = 6;
373         ps_part_attr->u1_x_count = 8;
374         ps_part_attr->u1_y_count = 2;
375 
376         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
377         ps_part_attr->u1_x_start = 0;
378         ps_part_attr->u1_y_start = 0;
379         ps_part_attr->u1_x_count = 2;
380         ps_part_attr->u1_y_count = 8;
381 
382         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
383         ps_part_attr->u1_x_start = 2;
384         ps_part_attr->u1_y_start = 0;
385         ps_part_attr->u1_x_count = 6;
386         ps_part_attr->u1_y_count = 8;
387 
388         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
389         ps_part_attr->u1_x_start = 0;
390         ps_part_attr->u1_y_start = 0;
391         ps_part_attr->u1_x_count = 6;
392         ps_part_attr->u1_y_count = 8;
393 
394         ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
395         ps_part_attr->u1_x_start = 6;
396         ps_part_attr->u1_y_start = 0;
397         ps_part_attr->u1_x_count = 2;
398         ps_part_attr->u1_y_count = 8;
399     }
400     for(i = 0; i < NUM_BLK_SIZES; i++)
401         ge_blk_size_to_cu_size[i] = CU_INVALID;
402 
403     ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
404     ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
405     ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
406     ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
407 
408     /* This is the reverse, given cU size, get blk size */
409     ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
410     ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
411     ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
412     ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
413 
414     gau1_is_vert_part[PRT_2Nx2N] = 0;
415     gau1_is_vert_part[PRT_2NxN] = 0;
416     gau1_is_vert_part[PRT_Nx2N] = 1;
417     gau1_is_vert_part[PRT_NxN] = 1;
418     gau1_is_vert_part[PRT_2NxnU] = 0;
419     gau1_is_vert_part[PRT_2NxnD] = 0;
420     gau1_is_vert_part[PRT_nLx2N] = 1;
421     gau1_is_vert_part[PRT_nRx2N] = 1;
422 
423     /* Initialise the number of best results for the full pell refinement */
424     gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
425     gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
426     gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
427     gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
428     gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
429     gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
430     gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
431     gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
432     gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
433     gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
434     gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
435     gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
436     gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
437     gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
438     gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
439     gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
440     gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
441 
442     gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
443     gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
444     gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
445     gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
446     gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
447     gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
448     gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
449     gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
450     gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
451     gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
452     gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
453     gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
454     gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
455     gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
456     gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
457     gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
458     gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
459 
460     gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
461     gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
462     gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
463     gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
464     gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
465     gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
466     gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
467     gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
468     gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
469     gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
470     gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
471     gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
472     gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
473     gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
474     gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
475     gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
476     gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
477 
478     gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
479     gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
480     gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
481     gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
482     gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
483     gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
484     gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
485     gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
486     gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
487     gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
488     gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
489     gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
490     gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
491     gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
492     gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
493     gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
494     gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
495 
496     gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
497     gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
498     gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
499     gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
500     gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
501     gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
502     gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
503     gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
504     gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
505     gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
506     gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
507     gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
508     gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
509     gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
510     gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
511     gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
512     gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
513 
514     gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
515     gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
516     gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
517     gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
518     gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
519     gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
520     gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
521     gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
522     gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
523     gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
524     gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
525     gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
526     gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
527     gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
528     gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
529     gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
530     gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
531 
532     /* Top right validity for each part id */
533     gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
534     gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
535     gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
536     gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
537     gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
538     gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
539     gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
540     gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
541     gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
542     gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
543     gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
544     gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
545     gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
546     gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
547     gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
548     gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
549     gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
550 
551     /* Bot Left validity for each part id */
552     gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
553     gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
554     gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
555     gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
556     gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
557     gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
558     gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
559     gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
560     gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
561     gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
562     gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
563     gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
564     gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
565     gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
566     gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
567     gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
568     gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
569 
570     /*Part id to part num of this partition id in the CU */
571     gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
572     gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
573     gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
574     gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
575     gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
576     gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
577     gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
578     gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
579     gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
580     gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
581     gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
582     gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
583     gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
584     gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
585     gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
586     gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
587     gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
588 
589     /*Which partition type does this partition id belong to */
590     ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
591     ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
592     ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
593     ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
594     ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
595     ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
596     ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
597     ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
598     ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
599     ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
600     ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
601     ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
602     ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
603     ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
604     ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
605     ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
606     ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
607 
608     /*************************************************************************/
609     /* Set up the bits to be taken up for the part type. This is equally     */
610     /* divided up between the various partitions in the part-type.           */
611     /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as         */
612     /* partition 2Nx2N.                                                      */
613     /*************************************************************************/
614     /* 1 bit for 2Nx2N partition */
615     gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
616 
617     /* 3 bits for symmetric part types, so 1.5 bits per partition */
618     gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
619     gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
620     gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
621     gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
622 
623     /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
624     gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
625     gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
626     gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
627     gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
628 
629     /* 4 bits for AMP so 2 bits per partition */
630     gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
631     gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
632     gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
633     gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
634     gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
635     gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
636     gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
637     gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
638 }
639 
640 /**
641 ********************************************************************************
642 *  @fn     hme_enc_num_alloc()
643 *
644 *  @brief  returns number of memtabs that is required by hme module
645 *
646 *  @return   Number of memtabs required
647 ********************************************************************************
648 */
hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)649 S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
650 {
651     if(i4_num_me_frm_pllel > 1)
652     {
653         return ((S32)MAX_HME_ENC_TOT_MEMTABS);
654     }
655     else
656     {
657         return ((S32)MIN_HME_ENC_TOT_MEMTABS);
658     }
659 }
660 
661 /**
662 ********************************************************************************
663 *  @fn     hme_coarse_num_alloc()
664 *
665 *  @brief  returns number of memtabs that is required by hme module
666 *
667 *  @return   Number of memtabs required
668 ********************************************************************************
669 */
hme_coarse_num_alloc()670 S32 hme_coarse_num_alloc()
671 {
672     return ((S32)HME_COARSE_TOT_MEMTABS);
673 }
674 
675 /**
676 ********************************************************************************
677 *  @fn     hme_coarse_dep_mngr_num_alloc()
678 *
679 *  @brief  returns number of memtabs that is required by Dep Mngr for hme module
680 *
681 *  @return   Number of memtabs required
682 ********************************************************************************
683 */
hme_coarse_dep_mngr_num_alloc()684 WORD32 hme_coarse_dep_mngr_num_alloc()
685 {
686     return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
687 }
688 
hme_validate_init_prms(hme_init_prms_t * ps_prms)689 S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
690 {
691     S32 n_layers = ps_prms->num_simulcast_layers;
692 
693     /* The final layer has got to be a non encode coarse layer */
694     if(n_layers > (MAX_NUM_LAYERS - 1))
695         return (-1);
696 
697     if(n_layers < 1)
698         return (-1);
699 
700     /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
701     /* represents the min allowed width in any layer. Ditto with ht          */
702     if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
703         return (-1);
704     if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
705         return (-1);
706     if(ps_prms->max_num_ref > MAX_NUM_REF)
707         return (-1);
708     if(ps_prms->max_num_ref < 0)
709         return (-1);
710 
711     return (0);
712 }
hme_set_layer_res_attrs(layer_ctxt_t * ps_layer,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,U08 u1_enc)713 void hme_set_layer_res_attrs(
714     layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
715 {
716     ps_layer->i4_wd = wd;
717     ps_layer->i4_ht = ht;
718     ps_layer->i4_disp_wd = disp_wd;
719     ps_layer->i4_disp_ht = disp_ht;
720     if(0 == u1_enc)
721     {
722         ps_layer->i4_inp_stride = wd + 32 + 4;
723         ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
724         ps_layer->i4_pad_x_inp = 16;
725         ps_layer->i4_pad_y_inp = 16;
726         ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
727     }
728 }
729 
730 /**
731 ********************************************************************************
732 *  @fn     hme_coarse_get_layer1_mv_bank_ref_idx_size()
733 *
734 *  @brief  returns the MV bank and ref idx size of Layer 1 (penultimate)
735 *
736 *  @return   none
737 ********************************************************************************
738 */
hme_coarse_get_layer1_mv_bank_ref_idx_size(S32 n_tot_layers,S32 * a_wd,S32 * a_ht,S32 max_num_ref,S32 * pi4_mv_bank_size,S32 * pi4_ref_idx_size)739 void hme_coarse_get_layer1_mv_bank_ref_idx_size(
740     S32 n_tot_layers,
741     S32 *a_wd,
742     S32 *a_ht,
743     S32 max_num_ref,
744     S32 *pi4_mv_bank_size,
745     S32 *pi4_ref_idx_size)
746 {
747     S32 num_blks, num_mvs_per_blk, num_ref;
748     S32 num_cols, num_rows, num_mvs_per_row;
749     S32 is_explicit_store = 1;
750     S32 wd, ht, num_layers_explicit_search;
751     S32 num_results, use_4x4;
752     wd = a_wd[1];
753     ht = a_ht[1];
754 
755     /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
756     /* frames in all but final layer In final layer, it could be 1/2 */
757     //ps_hme_init_prms->num_layers_explicit_search = 3;
758     num_layers_explicit_search = 3;
759 
760     if(num_layers_explicit_search <= 0)
761         num_layers_explicit_search = n_tot_layers - 1;
762 
763     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
764 
765     /* Possibly implicit search for lower (finer) layers */
766     if(n_tot_layers - 1 > num_layers_explicit_search)
767         is_explicit_store = 0;
768 
769     /* coarsest layer alwasy uses 4x4 blks to store results */
770     if(1 == (n_tot_layers - 1))
771     {
772         /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
773         //ps_hme_init_prms->max_num_results_coarse = 4;
774         //vijay : with new algo in coarseset layer this has to be revisited
775         num_results = 4;
776     }
777     else
778     {
779         /* Every refinement layer stores a max of 2 results per partition */
780         //ps_hme_init_prms->max_num_results = 2;
781         num_results = 2;
782     }
783     use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
784 
785     num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
786     num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
787 
788     if(is_explicit_store)
789         num_ref = max_num_ref;
790     else
791         num_ref = 2;
792 
793     num_blks = num_cols * num_rows;
794     num_mvs_per_blk = num_ref * num_results;
795     num_mvs_per_row = num_mvs_per_blk * num_cols;
796 
797     /* stroe the sizes */
798     *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
799     *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
800 
801     return;
802 }
803 /**
804 ********************************************************************************
805 *  @fn     hme_alloc_init_layer_mv_bank()
806 *
807 *  @brief  memory alloc and init function for MV bank
808 *
809 *  @return   Number of memtabs required
810 ********************************************************************************
811 */
hme_alloc_init_layer_mv_bank(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 is_explicit_store,hme_mv_t ** pps_mv_base,S08 ** pi1_ref_idx_base,S32 * pi4_num_mvs_per_row)812 S32 hme_alloc_init_layer_mv_bank(
813     hme_memtab_t *ps_memtab,
814     S32 max_num_results,
815     S32 max_num_ref,
816     S32 use_4x4,
817     S32 mem_avail,
818     S32 u1_enc,
819     S32 wd,
820     S32 ht,
821     S32 is_explicit_store,
822     hme_mv_t **pps_mv_base,
823     S08 **pi1_ref_idx_base,
824     S32 *pi4_num_mvs_per_row)
825 {
826     S32 count = 0;
827     S32 size;
828     S32 num_blks, num_mvs_per_blk;
829     S32 num_ref;
830     S32 num_cols, num_rows, num_mvs_per_row;
831 
832     if(is_explicit_store)
833         num_ref = max_num_ref;
834     else
835         num_ref = 2;
836 
837     /* MV Bank allocation takes into consideration following */
838     /* number of results per reference x max num refrences is the amount     */
839     /* bufffered up per blk. Numbero f blks in pic deps on the blk size,     */
840     /* which could be either 4x4 or 8x8.                                     */
841     num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
842     num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
843 
844     if(u1_enc)
845     {
846         /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
847         WORD32 num_ctb_cols = ((wd + 63) >> 6);
848         WORD32 num_ctb_rows = ((ht + 63) >> 6);
849 
850         num_cols = (num_ctb_cols << 3) + 2;
851         num_rows = (num_ctb_rows << 3) + 2;
852     }
853     num_blks = num_cols * num_rows;
854     num_mvs_per_blk = num_ref * max_num_results;
855     num_mvs_per_row = num_mvs_per_blk * num_cols;
856 
857     size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
858     if(mem_avail)
859     {
860         /* store this for run time verifications */
861         *pi4_num_mvs_per_row = num_mvs_per_row;
862         ASSERT(ps_memtab[count].size == size);
863         *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
864     }
865     else
866     {
867         ps_memtab[count].size = size;
868         ps_memtab[count].align = 4;
869         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
870     }
871 
872     count++;
873     /* Ref idx takes the same route as mvbase */
874 
875     size = num_blks * num_mvs_per_blk * sizeof(S08);
876     if(mem_avail)
877     {
878         ASSERT(ps_memtab[count].size == size);
879         *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
880     }
881     else
882     {
883         ps_memtab[count].size = size;
884         ps_memtab[count].align = 4;
885         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
886     }
887     count++;
888 
889     return (count);
890 }
891 /**
892 ********************************************************************************
893 *  @fn     hme_alloc_init_layer()
894 *
895 *  @brief  memory alloc and init function
896 *
897 *  @return   Number of memtabs required
898 ********************************************************************************
899 */
hme_alloc_init_layer(hme_memtab_t * ps_memtab,S32 max_num_results,S32 max_num_ref,S32 use_4x4,S32 mem_avail,S32 u1_enc,S32 wd,S32 ht,S32 disp_wd,S32 disp_ht,S32 segment_layer,S32 is_explicit_store,layer_ctxt_t ** pps_layer)900 S32 hme_alloc_init_layer(
901     hme_memtab_t *ps_memtab,
902     S32 max_num_results,
903     S32 max_num_ref,
904     S32 use_4x4,
905     S32 mem_avail,
906     S32 u1_enc,
907     S32 wd,
908     S32 ht,
909     S32 disp_wd,
910     S32 disp_ht,
911     S32 segment_layer,
912     S32 is_explicit_store,
913     layer_ctxt_t **pps_layer)
914 {
915     S32 count = 0;
916     layer_ctxt_t *ps_layer = NULL;
917     S32 size;
918     S32 num_ref;
919 
920     ARG_NOT_USED(segment_layer);
921 
922     if(is_explicit_store)
923         num_ref = max_num_ref;
924     else
925         num_ref = 2;
926 
927     /* We do not store 4x4 results for encoding layers */
928     if(u1_enc)
929         use_4x4 = 0;
930 
931     size = sizeof(layer_ctxt_t);
932     if(mem_avail)
933     {
934         ASSERT(ps_memtab[count].size == size);
935         ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
936         *pps_layer = ps_layer;
937     }
938     else
939     {
940         ps_memtab[count].size = size;
941         ps_memtab[count].align = 8;
942         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
943     }
944 
945     count++;
946 
947     /* Input luma buffer allocated only for non encode case */
948     if(0 == u1_enc)
949     {
950         /* Allocate input with padding of 16 pixels */
951         size = (wd + 32 + 4) * (ht + 32 + 4);
952         if(mem_avail)
953         {
954             ASSERT(ps_memtab[count].size == size);
955             ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
956         }
957         else
958         {
959             ps_memtab[count].size = size;
960             ps_memtab[count].align = 16;
961             ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
962         }
963         count++;
964     }
965 
966     /* Allocate memory or just the layer mvbank strcture. */
967     /* TODO : see if this can be removed by moving it to layer_ctxt */
968     size = sizeof(layer_mv_t);
969 
970     if(mem_avail)
971     {
972         ASSERT(ps_memtab[count].size == size);
973         ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
974     }
975     else
976     {
977         ps_memtab[count].size = size;
978         ps_memtab[count].align = 8;
979         ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
980     }
981 
982     count++;
983 
984     if(mem_avail)
985     {
986         hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
987     }
988 
989     return (count);
990 }
991 
hme_alloc_init_search_nodes(search_results_t * ps_search_results,hme_memtab_t * ps_memtabs,S32 mem_avail,S32 max_num_ref,S32 max_num_results)992 S32 hme_alloc_init_search_nodes(
993     search_results_t *ps_search_results,
994     hme_memtab_t *ps_memtabs,
995     S32 mem_avail,
996     S32 max_num_ref,
997     S32 max_num_results)
998 {
999     S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
1000     S32 j, k;
1001     search_node_t *ps_search_node;
1002 
1003     if(mem_avail == 0)
1004     {
1005         ps_memtabs->size = size;
1006         ps_memtabs->align = 4;
1007         ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
1008         return (1);
1009     }
1010 
1011     ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
1012     ASSERT(ps_memtabs->size == size);
1013     /****************************************************************************/
1014     /* For each CU, we search and store N best results, per partition, per ref  */
1015     /* So, number of memtabs is  num_refs * num_parts                           */
1016     /****************************************************************************/
1017     for(j = 0; j < max_num_ref; j++)
1018     {
1019         for(k = 0; k < TOT_NUM_PARTS; k++)
1020         {
1021             ps_search_results->aps_part_results[j][k] = ps_search_node;
1022             ps_search_node += max_num_results;
1023         }
1024     }
1025     return (1);
1026 }
1027 
hme_derive_num_layers(S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 * p_disp_wd,S32 * p_disp_ht)1028 S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
1029 {
1030     S32 i;
1031     /* We keep downscaling by 2 till we hit one of the conditions:           */
1032     /* 1. MAX_NUM_LAYERS reached.                                            */
1033     /* 2. Width or ht goes below min width and ht allowed at coarsest layer  */
1034     ASSERT(n_enc_layers < MAX_NUM_LAYERS);
1035     ASSERT(n_enc_layers > 0);
1036     ASSERT(p_wd[0] <= HME_MAX_WIDTH);
1037     ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
1038 
1039     p_disp_wd[0] = p_wd[0];
1040     p_disp_ht[0] = p_ht[0];
1041     /*************************************************************************/
1042     /* Verify that for simulcast, lower layer to higher layer ratio is bet   */
1043     /* 2 (dyadic) and 1.33. Typically it should be 1.5.                      */
1044     /* TODO : for interlace, we may choose to have additional downscaling for*/
1045     /* width alone in coarsest layer to next layer.                          */
1046     /*************************************************************************/
1047     for(i = 1; i < n_enc_layers; i++)
1048     {
1049         S32 wd1, wd2, ht1, ht2;
1050         wd1 = FLOOR16(p_wd[i - 1] >> 1);
1051         wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
1052         ASSERT(p_wd[i] >= wd1);
1053         ASSERT(p_wd[i] <= wd2);
1054         ht1 = FLOOR16(p_ht[i - 1] >> 1);
1055         ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
1056         ASSERT(p_ht[i] >= ht1);
1057         ASSERT(p_ht[i] <= ht2);
1058     }
1059     ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
1060     ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
1061 
1062     for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
1063     {
1064         if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
1065         {
1066             return (i);
1067         }
1068         /* Use CEIL16 to facilitate 16x16 searches in future, or to do       */
1069         /* segmentation study in future                                      */
1070         p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
1071         p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
1072 
1073         p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
1074         p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
1075     }
1076     return (i);
1077 }
1078 
1079 /**
1080 ********************************************************************************
1081 *  @fn     hme_get_mv_blk_size()
1082 *
1083 *  @brief  returns whether blk uses 4x4 size or something else.
1084 *
1085 *  @param[in] enable_4x4 : input param from application to enable 4x4
1086 *
1087 *  @param[in] layer_id : id of current layer (0 finest)
1088 *
1089 *  @param[in] num_layeers : total num layers
1090 *
1091 *  @param[in] is_enc : Whether encoding enabled for layer
1092 *
1093 *  @return   1 for 4x4 blks, 0 for 8x8
1094 ********************************************************************************
1095 */
hme_get_mv_blk_size(S32 enable_4x4,S32 layer_id,S32 num_layers,S32 is_enc)1096 S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
1097 {
1098     S32 use_4x4 = enable_4x4;
1099 
1100     if((layer_id <= 1) && (num_layers >= 4))
1101         use_4x4 = USE_4x4_IN_L1;
1102     if(layer_id == num_layers - 1)
1103         use_4x4 = 1;
1104     if(is_enc)
1105         use_4x4 = 0;
1106 
1107     return (use_4x4);
1108 }
1109 
1110 /**
1111 ********************************************************************************
1112 *  @fn     hme_enc_alloc_init_mem()
1113 *
1114 *  @brief  Requests/ assign memory based on mem avail
1115 *
1116 *  @param[in] ps_memtabs : memtab array
1117 *
1118 *  @param[in] ps_prms : init prms
1119 *
1120 *  @param[in] pv_ctxt : ME ctxt
1121 *
1122 *  @param[in] mem_avail : request/assign flag
1123 *
1124 *  @return   1 for 4x4 blks, 0 for 8x8
1125 ********************************************************************************
1126 */
hme_enc_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail,S32 i4_num_me_frm_pllel)1127 S32 hme_enc_alloc_init_mem(
1128     hme_memtab_t *ps_memtabs,
1129     hme_init_prms_t *ps_prms,
1130     void *pv_ctxt,
1131     S32 mem_avail,
1132     S32 i4_num_me_frm_pllel)
1133 {
1134     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
1135     me_ctxt_t *ps_ctxt;
1136     S32 count = 0, size, i, j, use_4x4;
1137     S32 n_tot_layers, n_enc_layers;
1138     S32 num_layers_explicit_search;
1139     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1140     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1141     S32 num_results;
1142     S32 num_thrds;
1143     S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1144 
1145     /* MV bank changes */
1146     hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1147     S32 i4_num_mvs_per_row = 0;
1148     S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
1149 
1150     n_enc_layers = ps_prms->num_simulcast_layers;
1151 
1152     /* Memtab 0: handle */
1153     size = sizeof(me_master_ctxt_t);
1154     if(mem_avail)
1155     {
1156         /* store the number of processing threads */
1157         ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1158     }
1159     else
1160     {
1161         ps_memtabs[count].size = size;
1162         ps_memtabs[count].align = 8;
1163         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1164     }
1165 
1166     count++;
1167 
1168     /* Memtab 1: ME threads ctxt */
1169     size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
1170     if(mem_avail)
1171     {
1172         me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
1173 
1174         /* store the indivisual thread ctxt pointers */
1175         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1176         {
1177             ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1178         }
1179     }
1180     else
1181     {
1182         ps_memtabs[count].size = size;
1183         ps_memtabs[count].align = 8;
1184         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1185     }
1186 
1187     count++;
1188 
1189     /* Memtab 2: ME frame ctxts */
1190     size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
1191     if(mem_avail)
1192     {
1193         me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
1194 
1195         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1196         {
1197             /* store the indivisual thread ctxt pointers */
1198             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1199             {
1200                 ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
1201 
1202                 ps_me_frm_tmp_ctxt++;
1203             }
1204         }
1205     }
1206     else
1207     {
1208         ps_memtabs[count].size = size;
1209         ps_memtabs[count].align = 8;
1210         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1211     }
1212 
1213     count++;
1214 
1215     memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1216     memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1217     /*************************************************************************/
1218     /* Derive the number of HME layers, including both encoded and non encode*/
1219     /* This function also derives the width and ht of each layer.            */
1220     /*************************************************************************/
1221     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
1222     num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1223     if(num_layers_explicit_search <= 0)
1224         num_layers_explicit_search = n_tot_layers - 1;
1225 
1226     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1227 
1228     if(mem_avail)
1229     {
1230         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1231         {
1232             me_frm_ctxt_t *ps_frm_ctxt;
1233             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1234 
1235             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1236             {
1237                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1238 
1239                 memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
1240                 memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
1241 
1242                 /* only one enocde layer is used */
1243                 ps_frm_ctxt->num_layers = 1;
1244 
1245                 ps_frm_ctxt->i4_wd = a_wd[0];
1246                 ps_frm_ctxt->i4_ht = a_ht[0];
1247                 /*
1248             memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
1249             memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
1250 */
1251                 ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1252                 ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
1253                 ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1254                 ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
1255             }
1256         }
1257     }
1258 
1259     /* Memtabs : Layers MV bank for encode layer */
1260     /* Each ref_desr in master ctxt will have seperate layer ctxt */
1261 
1262     for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1263     {
1264         for(j = 0; j < 1; j++)
1265         {
1266             S32 is_explicit_store = 1;
1267             S32 wd, ht;
1268             U08 u1_enc = 1;
1269             wd = a_wd[j];
1270             ht = a_ht[j];
1271 
1272             /* Possibly implicit search for lower (finer) layers */
1273             if(n_tot_layers - j > num_layers_explicit_search)
1274                 is_explicit_store = 0;
1275 
1276             /* Even if explicit search, we store only 2 results (L0 and L1) */
1277             /* in finest layer */
1278             if(j == 0)
1279             {
1280                 is_explicit_store = 0;
1281             }
1282 
1283             /* coarsest layer alwasy uses 4x4 blks to store results */
1284             if(j == n_tot_layers - 1)
1285             {
1286                 num_results = ps_prms->max_num_results_coarse;
1287             }
1288             else
1289             {
1290                 num_results = ps_prms->max_num_results;
1291                 if(j == 0)
1292                     num_results = 1;
1293             }
1294             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1295 
1296             count += hme_alloc_init_layer_mv_bank(
1297                 &ps_memtabs[count],
1298                 num_results,
1299                 ps_prms->max_num_ref,
1300                 use_4x4,
1301                 mem_avail,
1302                 u1_enc,
1303                 wd,
1304                 ht,
1305                 is_explicit_store,
1306                 &aps_mv_bank[i],
1307                 &api1_ref_idx[i],
1308                 &i4_num_mvs_per_row);
1309         }
1310     }
1311 
1312     /* Memtabs : Layers * num-ref + 1 */
1313     for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
1314     {
1315         /* layer memory allocated only for enocde layer */
1316         for(j = 0; j < 1; j++)
1317         {
1318             layer_ctxt_t *ps_layer;
1319             S32 is_explicit_store = 1;
1320             S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1321             S32 wd, ht;
1322             U08 u1_enc = 1;
1323             wd = a_wd[j];
1324             ht = a_ht[j];
1325 
1326             /* Possibly implicit search for lower (finer) layers */
1327             if(n_tot_layers - j > num_layers_explicit_search)
1328                 is_explicit_store = 0;
1329 
1330             /* Even if explicit search, we store only 2 results (L0 and L1) */
1331             /* in finest layer */
1332             if(j == 0)
1333             {
1334                 is_explicit_store = 0;
1335             }
1336 
1337             /* coarsest layer alwasy uses 4x4 blks to store results */
1338             if(j == n_tot_layers - 1)
1339             {
1340                 num_results = ps_prms->max_num_results_coarse;
1341             }
1342             else
1343             {
1344                 num_results = ps_prms->max_num_results;
1345                 if(j == 0)
1346                     num_results = 1;
1347             }
1348             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1349 
1350             count += hme_alloc_init_layer(
1351                 &ps_memtabs[count],
1352                 num_results,
1353                 ps_prms->max_num_ref,
1354                 use_4x4,
1355                 mem_avail,
1356                 u1_enc,
1357                 wd,
1358                 ht,
1359                 a_disp_wd[j],
1360                 a_disp_ht[j],
1361                 segment_this_layer,
1362                 is_explicit_store,
1363                 &ps_layer);
1364             if(mem_avail)
1365             {
1366                 /* same ps_layer memory pointer is stored in all the threads */
1367                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1368                 {
1369                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1370                     ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1371                 }
1372 
1373                 /* store the MV bank pointers */
1374                 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
1375                 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
1376                 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
1377             }
1378         }
1379     }
1380 
1381     /* Memtabs : Buf Mgr for predictor bufs and working mem */
1382     /* TODO : Parameterise this appropriately */
1383     size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
1384 
1385     if(mem_avail)
1386     {
1387         U08 *pu1_mem = ps_memtabs[count].pu1_mem;
1388 
1389         ASSERT(ps_memtabs[count].size == size);
1390 
1391         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1392         {
1393             me_frm_ctxt_t *ps_frm_ctxt;
1394             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1395 
1396             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1397             {
1398                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1399 
1400                 hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
1401 
1402                 if(i4_num_me_frm_pllel != 1)
1403                 {
1404                     /* update the memory buffer pointer */
1405                     pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1406                 }
1407             }
1408             if(i4_num_me_frm_pllel == 1)
1409             {
1410                 pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
1411             }
1412         }
1413     }
1414     else
1415     {
1416         ps_memtabs[count].size = size;
1417         ps_memtabs[count].align = 4;
1418         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1419     }
1420     count++;
1421 
1422     /*************************************************************************/
1423     /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
1424     /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred  */
1425     /*************************************************************************/
1426     size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
1427            i4_num_me_frm_pllel;
1428 
1429     if(mem_avail)
1430     {
1431         S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
1432 
1433         ASSERT(ps_memtabs[count].size == size);
1434 
1435         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1436         {
1437             me_frm_ctxt_t *ps_frm_ctxt;
1438             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1439 
1440             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1441             {
1442                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1443 
1444                 ps_frm_ctxt->pi2_inp_bck = pi2_mem;
1445                 /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1446                 if(i4_num_me_frm_pllel != 1)
1447                 {
1448                     pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1449                 }
1450             }
1451             if(i4_num_me_frm_pllel == 1)
1452             {
1453                 pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
1454             }
1455         }
1456     }
1457     else
1458     {
1459         ps_memtabs[count].size = size;
1460         ps_memtabs[count].align = 16;
1461         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1462     }
1463 
1464     count++;
1465 
1466     /* Allocate a memtab for each histogram. As many as num ref and number of threads */
1467     /* Loop across for each ME_FRM in PARALLEL */
1468     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1469     {
1470         for(i = 0; i < ps_prms->max_num_ref; i++)
1471         {
1472             size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
1473             if(mem_avail)
1474             {
1475                 mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
1476 
1477                 ASSERT(size == ps_memtabs[count].size);
1478 
1479                 /* divide the memory accross the threads */
1480                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1481                 {
1482                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1483 
1484                     ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
1485                     ps_mv_hist++;
1486                 }
1487             }
1488             else
1489             {
1490                 ps_memtabs[count].size = size;
1491                 ps_memtabs[count].align = 8;
1492                 ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1493             }
1494             count++;
1495         }
1496         if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1497         {
1498             /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
1499             /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
1500             count -= ps_prms->max_num_ref;
1501         }
1502     }
1503 
1504     /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
1505     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1506     {
1507         S32 count_cpy = count;
1508         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1509         {
1510             if(mem_avail)
1511             {
1512                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1513             }
1514 
1515             for(i = 0; i < 21; i++)
1516             {
1517                 search_results_t *ps_search_results = NULL;
1518                 if(mem_avail)
1519                 {
1520                     if(i < 16)
1521                     {
1522                         ps_search_results =
1523                             &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
1524                     }
1525                     else if(i < 20)
1526                     {
1527                         ps_search_results =
1528                             &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
1529                         ps_search_results->ps_cu_results =
1530                             &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
1531                     }
1532                     else if(i == 20)
1533                     {
1534                         ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
1535                         ps_search_results->ps_cu_results =
1536                             &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
1537                     }
1538                     else
1539                     {
1540                         /* 8x8 search results are not required in LO ME */
1541                         ASSERT(0);
1542                     }
1543                 }
1544                 count += hme_alloc_init_search_nodes(
1545                     ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
1546             }
1547         }
1548 
1549         if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
1550         {
1551             count = count_cpy;
1552         }
1553     }
1554 
1555     /* Weighted inputs, one for each ref + one non weighted */
1556     for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
1557     {
1558         size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
1559         if(mem_avail)
1560         {
1561             U08 *pu1_mem;
1562             ASSERT(ps_memtabs[count].size == size);
1563             pu1_mem = ps_memtabs[count].pu1_mem;
1564 
1565             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1566             {
1567                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1568 
1569                 for(i = 0; i < ps_prms->max_num_ref + 1; i++)
1570                 {
1571                     ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
1572                     pu1_mem += (ctb_wd * ctb_wd);
1573                 }
1574             }
1575         }
1576         else
1577         {
1578             ps_memtabs[count].size = size;
1579             ps_memtabs[count].align = 16;
1580             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1581         }
1582         if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
1583         {
1584             count++;
1585         }
1586     }
1587 
1588     /* if memory is allocated the intislaise the frm prms ptr to each thrd */
1589     if(mem_avail)
1590     {
1591         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1592         {
1593             me_frm_ctxt_t *ps_frm_ctxt;
1594             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1595 
1596             for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1597             {
1598                 ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
1599 
1600                 ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
1601                 ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
1602             }
1603         }
1604     }
1605 
1606     /* Memory allocation for use in Clustering */
1607     if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
1608     {
1609         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1610         {
1611             size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
1612                    sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
1613             size *= ps_prms->i4_num_proc_thrds;
1614 
1615             if(mem_avail)
1616             {
1617                 U08 *pu1_mem;
1618 
1619                 ASSERT(ps_memtabs[count].size == size);
1620                 pu1_mem = ps_memtabs[count].pu1_mem;
1621 
1622                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1623                 {
1624                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1625 
1626                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
1627                     pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
1628 
1629                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
1630                     pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
1631 
1632                     ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
1633                     pu1_mem += (sizeof(cluster_64x64_blk_t));
1634 
1635                     ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
1636                         (ctb_cluster_info_t *)pu1_mem;
1637                     pu1_mem += (sizeof(ctb_cluster_info_t));
1638                 }
1639             }
1640             else
1641             {
1642                 ps_memtabs[count].size = size;
1643                 ps_memtabs[count].align = 16;
1644                 ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1645             }
1646 
1647             if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1648             {
1649                 count++;
1650             }
1651         }
1652     }
1653     else if(mem_avail)
1654     {
1655         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1656         {
1657             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1658             {
1659                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1660 
1661                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
1662 
1663                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
1664 
1665                 ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
1666 
1667                 ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
1668             }
1669         }
1670     }
1671 
1672     for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
1673     {
1674         size = sizeof(fullpel_refine_ctxt_t);
1675         size *= ps_prms->i4_num_proc_thrds;
1676 
1677         if(mem_avail)
1678         {
1679             U08 *pu1_mem;
1680 
1681             ASSERT(ps_memtabs[count].size == size);
1682             pu1_mem = ps_memtabs[count].pu1_mem;
1683 
1684             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1685             {
1686                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1687 
1688                 ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
1689                     (fullpel_refine_ctxt_t *)pu1_mem;
1690                 pu1_mem += (sizeof(fullpel_refine_ctxt_t));
1691             }
1692         }
1693         else
1694         {
1695             ps_memtabs[count].size = size;
1696             ps_memtabs[count].align = 16;
1697             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1698         }
1699 
1700         if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
1701         {
1702             count++;
1703         }
1704     }
1705 
1706     /* Memory for ihevce_me_optimised_function_list_t struct  */
1707     if(mem_avail)
1708     {
1709         ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
1710     }
1711     else
1712     {
1713         ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
1714         ps_memtabs[count].align = 16;
1715         ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
1716     }
1717 
1718     ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
1719     return (count);
1720 }
1721 
1722 /**
1723 ********************************************************************************
1724 *  @fn     hme_coarse_alloc_init_mem()
1725 *
1726 *  @brief  Requests/ assign memory based on mem avail
1727 *
1728 *  @param[in] ps_memtabs : memtab array
1729 *
1730 *  @param[in] ps_prms : init prms
1731 *
1732 *  @param[in] pv_ctxt : ME ctxt
1733 *
1734 *  @param[in] mem_avail : request/assign flag
1735 *
1736 *  @return  number of memtabs
1737 ********************************************************************************
1738 */
hme_coarse_alloc_init_mem(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,void * pv_ctxt,S32 mem_avail)1739 S32 hme_coarse_alloc_init_mem(
1740     hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
1741 {
1742     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
1743     coarse_me_ctxt_t *ps_ctxt;
1744     S32 count = 0, size, i, j, use_4x4, wd;
1745     S32 n_tot_layers;
1746     S32 num_layers_explicit_search;
1747     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
1748     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
1749     S32 num_results;
1750     S32 num_thrds;
1751     //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
1752     S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
1753     S32 layer1_blk_width = 8;  // 8x8 search
1754     S32 blk_shift;
1755 
1756     /* MV bank changes */
1757     hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
1758     S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
1759     S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
1760 
1761     /* Memtab 0: handle */
1762     size = sizeof(coarse_me_master_ctxt_t);
1763     if(mem_avail)
1764     {
1765         /* store the number of processing threads */
1766         ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
1767     }
1768     else
1769     {
1770         ps_memtabs[count].size = size;
1771         ps_memtabs[count].align = 8;
1772         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1773     }
1774 
1775     count++;
1776 
1777     /* Memtab 1: ME threads ctxt */
1778     size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
1779     if(mem_avail)
1780     {
1781         coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
1782 
1783         /* store the indivisual thread ctxt pointers */
1784         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1785         {
1786             ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
1787         }
1788     }
1789     else
1790     {
1791         ps_memtabs[count].size = size;
1792         ps_memtabs[count].align = 8;
1793         ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
1794     }
1795 
1796     count++;
1797 
1798     memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
1799     memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
1800     /*************************************************************************/
1801     /* Derive the number of HME layers, including both encoded and non encode*/
1802     /* This function also derives the width and ht of each layer.            */
1803     /*************************************************************************/
1804     n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
1805 
1806     num_layers_explicit_search = ps_prms->num_layers_explicit_search;
1807 
1808     if(num_layers_explicit_search <= 0)
1809         num_layers_explicit_search = n_tot_layers - 1;
1810 
1811     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
1812 
1813     if(mem_avail)
1814     {
1815         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1816         {
1817             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1818             memset(ps_ctxt->u1_encode, 0, n_tot_layers);
1819 
1820             /* encode layer should be excluded during processing */
1821             ps_ctxt->num_layers = n_tot_layers;
1822 
1823             memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
1824             memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
1825 
1826             ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
1827             ps_ctxt->max_num_results = ps_prms->max_num_results;
1828             ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
1829             ps_ctxt->max_num_ref = ps_prms->max_num_ref;
1830         }
1831     }
1832 
1833     /* Memtabs : Layers MV bank for total layers - 2  */
1834     /* for penultimate layer MV bank will be initialsed at every frame level */
1835     for(j = 1; j < n_tot_layers; j++)
1836     {
1837         S32 is_explicit_store = 1;
1838         S32 wd, ht;
1839         U08 u1_enc = 0;
1840         wd = a_wd[j];
1841         ht = a_ht[j];
1842 
1843         /* Possibly implicit search for lower (finer) layers */
1844         if(n_tot_layers - j > num_layers_explicit_search)
1845             is_explicit_store = 0;
1846 
1847         /* Even if explicit search, we store only 2 results (L0 and L1) */
1848         /* in finest layer */
1849         if(j == 0)
1850         {
1851             is_explicit_store = 0;
1852         }
1853 
1854         /* coarsest layer alwasy uses 4x4 blks to store results */
1855         if(j == n_tot_layers - 1)
1856         {
1857             num_results = ps_prms->max_num_results_coarse;
1858         }
1859         else
1860         {
1861             num_results = ps_prms->max_num_results;
1862             if(j == 0)
1863                 num_results = 1;
1864         }
1865         use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1866 
1867         /* for penultimate compute the parameters and store */
1868         if(j == 1)
1869         {
1870             S32 num_blks, num_mvs_per_blk, num_ref;
1871             S32 num_cols, num_rows, num_mvs_per_row;
1872 
1873             num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
1874             num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
1875 
1876             if(is_explicit_store)
1877                 num_ref = ps_prms->max_num_ref;
1878             else
1879                 num_ref = 2;
1880 
1881             num_blks = num_cols * num_rows;
1882             num_mvs_per_blk = num_ref * num_results;
1883             num_mvs_per_row = num_mvs_per_blk * num_cols;
1884 
1885             ai4_num_mvs_per_row[j] = num_mvs_per_row;
1886             aps_mv_bank[j] = NULL;
1887             api1_ref_idx[j] = NULL;
1888         }
1889         else
1890         {
1891             count += hme_alloc_init_layer_mv_bank(
1892                 &ps_memtabs[count],
1893                 num_results,
1894                 ps_prms->max_num_ref,
1895                 use_4x4,
1896                 mem_avail,
1897                 u1_enc,
1898                 wd,
1899                 ht,
1900                 is_explicit_store,
1901                 &aps_mv_bank[j],
1902                 &api1_ref_idx[j],
1903                 &ai4_num_mvs_per_row[j]);
1904         }
1905     }
1906 
1907     /* Memtabs : Layers * num-ref + 1 */
1908     for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
1909     {
1910         /* for all layer except encode layer */
1911         for(j = 1; j < n_tot_layers; j++)
1912         {
1913             layer_ctxt_t *ps_layer;
1914             S32 is_explicit_store = 1;
1915             S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
1916             S32 wd, ht;
1917             U08 u1_enc = 0;
1918             wd = a_wd[j];
1919             ht = a_ht[j];
1920 
1921             /* Possibly implicit search for lower (finer) layers */
1922             if(n_tot_layers - j > num_layers_explicit_search)
1923                 is_explicit_store = 0;
1924 
1925             /* Even if explicit search, we store only 2 results (L0 and L1) */
1926             /* in finest layer */
1927             if(j == 0)
1928             {
1929                 is_explicit_store = 0;
1930             }
1931 
1932             /* coarsest layer alwasy uses 4x4 blks to store results */
1933             if(j == n_tot_layers - 1)
1934             {
1935                 num_results = ps_prms->max_num_results_coarse;
1936             }
1937             else
1938             {
1939                 num_results = ps_prms->max_num_results;
1940                 if(j == 0)
1941                     num_results = 1;
1942             }
1943             use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
1944 
1945             count += hme_alloc_init_layer(
1946                 &ps_memtabs[count],
1947                 num_results,
1948                 ps_prms->max_num_ref,
1949                 use_4x4,
1950                 mem_avail,
1951                 u1_enc,
1952                 wd,
1953                 ht,
1954                 a_disp_wd[j],
1955                 a_disp_ht[j],
1956                 segment_this_layer,
1957                 is_explicit_store,
1958                 &ps_layer);
1959             if(mem_avail)
1960             {
1961                 /* same ps_layer memory pointer is stored in all the threads */
1962                 for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
1963                 {
1964                     ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
1965                     ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
1966                 }
1967 
1968                 /* store the MV bank pointers */
1969                 ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
1970                 ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
1971                 ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
1972             }
1973         }
1974     }
1975 
1976     /* Memtabs : Prev Row search node at coarsest layer */
1977     wd = a_wd[n_tot_layers - 1];
1978 
1979     /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
1980     num_rows = ps_prms->i4_num_proc_thrds + 1;
1981     if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
1982         search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
1983     else
1984         search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
1985 
1986     /*shift factor*/
1987     blk_shift = 2; /*4x4*/
1988     search_step >>= 1;
1989 
1990     sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
1991                          ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
1992     sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
1993 
1994     size = num_rows * sad_4x4_block_stride * sizeof(S16);
1995     for(i = 0; i < ps_prms->max_num_ref; i++)
1996     {
1997         if(mem_avail)
1998         {
1999             ASSERT(size == ps_memtabs[count].size);
2000 
2001             /* same row memory pointer is stored in all the threads */
2002             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2003             {
2004                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2005                 ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
2006             }
2007         }
2008         else
2009         {
2010             ps_memtabs[count].size = size;
2011             ps_memtabs[count].align = 4;
2012             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2013         }
2014         count++;
2015     }
2016 
2017     /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2018     size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2019     for(i = 0; i < ps_prms->max_num_ref; i++)
2020     {
2021         if(mem_avail)
2022         {
2023             ASSERT(size == ps_memtabs[count].size);
2024 
2025             /* same row memory pointer is stored in all the threads */
2026             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2027             {
2028                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2029                 ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
2030                     (search_node_t *)ps_memtabs[count].pu1_mem;
2031             }
2032         }
2033         else
2034         {
2035             ps_memtabs[count].size = size;
2036             ps_memtabs[count].align = 4;
2037             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2038         }
2039         count++;
2040     }
2041     /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
2042     size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
2043     for(i = 0; i < ps_prms->max_num_ref; i++)
2044     {
2045         if(mem_avail)
2046         {
2047             ASSERT(size == ps_memtabs[count].size);
2048 
2049             /* same row memory pointer is stored in all the threads */
2050             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2051             {
2052                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2053                 ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
2054                     (search_node_t *)ps_memtabs[count].pu1_mem;
2055             }
2056         }
2057         else
2058         {
2059             ps_memtabs[count].size = size;
2060             ps_memtabs[count].align = 4;
2061             ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2062         }
2063         count++;
2064     }
2065 
2066     /* Allocate a memtab for each histogram. As many as num ref and number of threads */
2067     for(i = 0; i < ps_prms->max_num_ref; i++)
2068     {
2069         size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
2070         if(mem_avail)
2071         {
2072             mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
2073 
2074             ASSERT(size == ps_memtabs[count].size);
2075 
2076             /* divide the memory accross the threads */
2077             for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2078             {
2079                 ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2080                 ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
2081                 ps_mv_hist++;
2082             }
2083         }
2084         else
2085         {
2086             ps_memtabs[count].size = size;
2087             ps_memtabs[count].align = 8;
2088             ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
2089         }
2090         count++;
2091     }
2092 
2093     /* Memtabs : Search nodes for 8x8 blks */
2094     for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2095     {
2096         search_results_t *ps_search_results = NULL;
2097 
2098         if(mem_avail)
2099         {
2100             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2101         }
2102 
2103         if(mem_avail)
2104         {
2105             ps_search_results = &ps_ctxt->s_search_results_8x8;
2106         }
2107         count += hme_alloc_init_search_nodes(
2108             ps_search_results,
2109             &ps_memtabs[count],
2110             mem_avail,
2111             ps_prms->max_num_ref,
2112             ps_prms->max_num_results);
2113     }
2114 
2115     /* Weighted inputs, one for each ref  */
2116     size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
2117            ps_prms->i4_num_proc_thrds;
2118     if(mem_avail)
2119     {
2120         U08 *pu1_mem;
2121         ASSERT(ps_memtabs[count].size == size);
2122         pu1_mem = ps_memtabs[count].pu1_mem;
2123 
2124         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2125         {
2126             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2127 
2128             for(i = 0; i < ps_prms->max_num_ref + 1; i++)
2129             {
2130                 ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
2131                 pu1_mem += (layer1_blk_width * layer1_blk_width);
2132             }
2133         }
2134     }
2135     else
2136     {
2137         ps_memtabs[count].size = size;
2138         ps_memtabs[count].align = 16;
2139         ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2140     }
2141     count++;
2142 
2143     /* if memory is allocated the intislaise the frm prms ptr to each thrd */
2144     if(mem_avail)
2145     {
2146         for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
2147         {
2148             ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2149 
2150             ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
2151             ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
2152         }
2153     }
2154 
2155     /* Memory for ihevce_me_optimised_function_list_t struct  */
2156     if(mem_avail)
2157     {
2158         ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
2159     }
2160     else
2161     {
2162         ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
2163         ps_memtabs[count].align = 16;
2164         ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
2165     }
2166 
2167     //ASSERT(count < hme_enc_num_alloc());
2168     ASSERT(count < hme_coarse_num_alloc());
2169     return (count);
2170 }
2171 
2172 /*!
2173 ******************************************************************************
2174 * \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
2175 *
2176 * \brief Returns to the caller key attributes relevant for dependency manager,
2177 *        ie, the number of vertical units in each layer
2178 *
2179 * \par Description:
2180 *    This function requires the precondition that the width and ht of encode
2181 *    layer is known.
2182 *    The number of layers, number of vertical units in each layer, and for
2183 *    each vertial unit in each layer, its dependency on previous layer's units
2184 *    From ME's perspective, a vertical unit is one which is smallest min size
2185 *    vertically (and spans the entire row horizontally). This is CTB for encode
2186 *    layer, and 8x8 / 4x4 for non encode layers.
2187 *
2188 * \param[in] num_layers : Number of ME Layers
2189 * \param[in] pai4_ht    : Array storing ht at each layer
2190 * \param[in] pai4_wd    : Array storing wd at each layer
2191 * \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
2192 *                     entry has num vertical units in that particular layer
2193 *
2194 * \return
2195 *    None
2196 *
2197 * \author
2198 *  Ittiam
2199 *
2200 *****************************************************************************
2201 */
ihevce_coarse_me_get_lyr_prms_dep_mngr(WORD32 num_layers,WORD32 * pai4_ht,WORD32 * pai4_wd,WORD32 * pai4_num_vert_units_in_lyr)2202 void ihevce_coarse_me_get_lyr_prms_dep_mngr(
2203     WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
2204 {
2205     /* Height of current and next layers */
2206     WORD32 ht_c, ht_n;
2207     /* Blk ht at a given layer and next layer*/
2208     WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
2209     /* Number of vertical units in current and next layer */
2210     WORD32 num_vert_c, num_vert_n;
2211 
2212     WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
2213     UWORD8 au1_encode[MAX_NUM_LAYERS];
2214 
2215     memset(au1_encode, 0, num_layers);
2216     memset(au1_encode, 1, num_enc_layers);
2217 
2218     ht_n = pai4_ht[num_layers - 2];
2219     ht_c = pai4_ht[num_layers - 1];
2220 
2221     /* compute blk ht and unit ht for c and n */
2222     if(au1_encode[num_layers - 1])
2223     {
2224         blk_ht_c = 16;
2225         unit_ht_c = ctb_size;
2226     }
2227     else
2228     {
2229         blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
2230         unit_ht_c = blk_ht_c;
2231     }
2232 
2233     num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
2234     /* For new design in Coarsest HME layer we need */
2235     /* one additional row extra at the end of frame */
2236     /* hence num_vert_c is incremented by 1         */
2237     num_vert_c++;
2238 
2239     /*************************************************************************/
2240     /* Run through each layer, set the number of vertical units              */
2241     /*************************************************************************/
2242     for(i = num_layers - 1; i > 0; i--)
2243     {
2244         pai4_num_vert_units_in_lyr[i] = num_vert_c;
2245 
2246         /* "n" is computed for first time */
2247         ht_n = pai4_ht[i - 1];
2248         blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
2249         unit_ht_n = blk_ht_n;
2250         if(au1_encode[i - 1])
2251             unit_ht_n = ctb_size;
2252 
2253         num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
2254 
2255         /* Compute the blk size and vert unit size in each layer             */
2256         /* "c" denotes curr layer, and "n" denotes the layer to which result */
2257         /* is projected to                                                   */
2258         ht_c = ht_n;
2259         blk_ht_c = blk_ht_n;
2260         unit_ht_c = unit_ht_n;
2261         num_vert_c = num_vert_n;
2262     }
2263 
2264     /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
2265     /* set the numebr of vertical units */
2266     pai4_num_vert_units_in_lyr[0] = num_vert_c;
2267 }
2268 
2269 /**
2270 ********************************************************************************
2271 *  @fn     hme_coarse_dep_mngr_alloc_mem()
2272 *
2273 *  @brief  Requests memory for HME Dep Mngr
2274 *
2275 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2276 * \param[in] ps_init_prms : Create time static parameters
2277 * \param[in] i4_mem_space : memspace in whihc memory request should be done
2278 *
2279 *  @return  number of memtabs
2280 ********************************************************************************
2281 */
hme_coarse_dep_mngr_alloc_mem(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2282 WORD32 hme_coarse_dep_mngr_alloc_mem(
2283     iv_mem_rec_t *ps_mem_tab,
2284     ihevce_static_cfg_params_t *ps_init_prms,
2285     WORD32 i4_mem_space,
2286     WORD32 i4_num_proc_thrds,
2287     WORD32 i4_resolution_id)
2288 {
2289     WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2290     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2291     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2292     WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2293     WORD32 min_cu_size;
2294 
2295     /* get the min cu size from config params */
2296     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2297 
2298     min_cu_size = 1 << min_cu_size;
2299 
2300     /* Get the width and heights of different decomp layers */
2301     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2302             SET_CTB_ALIGN(
2303                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2304 
2305     *a_ht =
2306         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2307         SET_CTB_ALIGN(
2308             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2309 
2310     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2311     ASSERT(n_tot_layers >= 3);
2312 
2313     /* --- Get the number of vartical units in each layer for dep. mngr -- */
2314     ihevce_coarse_me_get_lyr_prms_dep_mngr(
2315         n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2316 
2317     /* Fill memtabs for HME layers,except for L0 layer */
2318     for(i = 1; i < n_tot_layers; i++)
2319     {
2320         n_dep_tabs += ihevce_dmgr_get_mem_recs(
2321             &ps_mem_tab[n_dep_tabs],
2322             DEP_MNGR_ROW_ROW_SYNC,
2323             ai4_num_vert_units_in_lyr[i],
2324             1, /* Number of Col Tiles :  Not supported in PreEnc */
2325             i4_num_proc_thrds,
2326             i4_mem_space);
2327     }
2328 
2329     ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
2330 
2331     return (n_dep_tabs);
2332 }
2333 
2334 /**
2335 ********************************************************************************
2336 *  @fn     hme_coarse_dep_mngr_init()
2337 *
2338 *  @brief  Assign memory for HME Dep Mngr
2339 *
2340 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2341 * \param[in] ps_init_prms : Create time static parameters
2342 *  @param[in] pv_ctxt : ME ctxt
2343 * \param[in] pv_osal_handle : Osal handle
2344 *
2345 *  @return  number of memtabs
2346 ********************************************************************************
2347 */
hme_coarse_dep_mngr_init(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,void * pv_ctxt,void * pv_osal_handle,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2348 WORD32 hme_coarse_dep_mngr_init(
2349     iv_mem_rec_t *ps_mem_tab,
2350     ihevce_static_cfg_params_t *ps_init_prms,
2351     void *pv_ctxt,
2352     void *pv_osal_handle,
2353     WORD32 i4_num_proc_thrds,
2354     WORD32 i4_resolution_id)
2355 {
2356     WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
2357     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2358     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2359     WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
2360     WORD32 min_cu_size;
2361 
2362     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2363 
2364     /* get the min cu size from config params */
2365     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2366 
2367     min_cu_size = 1 << min_cu_size;
2368 
2369     /* Get the width and heights of different decomp layers */
2370     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2371             SET_CTB_ALIGN(
2372                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2373     *a_ht =
2374         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2375         SET_CTB_ALIGN(
2376             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2377 
2378     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2379     ASSERT(n_tot_layers >= 3);
2380 
2381     /* --- Get the number of vartical units in each layer for dep. mngr -- */
2382     ihevce_coarse_me_get_lyr_prms_dep_mngr(
2383         n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
2384 
2385     /* --- HME sync Dep Mngr Mem init --    */
2386     for(i = 1; i < n_tot_layers; i++)
2387     {
2388         WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
2389 
2390         if(i == (n_tot_layers - 1)) /* coarsest layer */
2391             blk_size_shift = 2;
2392         else
2393             blk_size_shift = 3; /* refine layers */
2394 
2395         GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
2396 
2397         /* Coarsest layer : 1 block extra, since the last block */
2398         if(i == (n_tot_layers - 1)) /*  in a row needs East block */
2399             num_blks_in_row += 1;
2400 
2401         /* Note : i-1, only for HME layers, L0 is separate */
2402         ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
2403             &ps_mem_tab[n_dep_tabs],
2404             pv_osal_handle,
2405             DEP_MNGR_ROW_ROW_SYNC,
2406             ai4_num_vert_units_in_lyr[i],
2407             num_blks_in_row,
2408             1, /* Number of Col Tiles : Not supported in PreEnc */
2409             i4_num_proc_thrds,
2410             1 /*Sem disabled*/
2411         );
2412 
2413         n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
2414     }
2415 
2416     return n_dep_tabs;
2417 }
2418 
2419 /**
2420 ********************************************************************************
2421 *  @fn     hme_coarse_dep_mngr_reg_sem()
2422 *
2423 *  @brief  Assign semaphores for HME Dep Mngr
2424 *
2425 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2426 * \param[in] ppv_sem_hdls : Arry of semaphore handles
2427 * \param[in] i4_num_proc_thrds : Number of processing threads
2428 *
2429 *  @return  number of memtabs
2430 ********************************************************************************
2431 */
hme_coarse_dep_mngr_reg_sem(void * pv_ctxt,void ** ppv_sem_hdls,WORD32 i4_num_proc_thrds)2432 void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
2433 {
2434     WORD32 i;
2435     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2436     coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
2437 
2438     /* --- HME sync Dep Mngr semaphore init --    */
2439     for(i = 1; i < ps_ctxt->num_layers; i++)
2440     {
2441         ihevce_dmgr_reg_sem_hdls(
2442             ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
2443     }
2444 
2445     return;
2446 }
2447 
2448 /**
2449 ********************************************************************************
2450 *  @fn     hme_coarse_dep_mngr_delete()
2451 *
2452 *    Destroy Coarse ME Dep Mngr module
2453 *   Note : Only Destroys the resources allocated in the module like
2454 *   semaphore,etc. Memory free is done Separately using memtabs
2455 *
2456 * \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
2457 * \param[in] ps_init_prms : Create time static parameters
2458 *
2459 *  @return  none
2460 ********************************************************************************
2461 */
hme_coarse_dep_mngr_delete(void * pv_me_ctxt,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_resolution_id)2462 void hme_coarse_dep_mngr_delete(
2463     void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
2464 {
2465     WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
2466     WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
2467     WORD32 n_enc_layers = 1, n_tot_layers, i;
2468     WORD32 min_cu_size;
2469 
2470     coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
2471 
2472     /* get the min cu size from config params */
2473     min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
2474 
2475     min_cu_size = 1 << min_cu_size;
2476 
2477     /* Get the width and heights of different decomp layers */
2478     *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
2479             SET_CTB_ALIGN(
2480                 ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
2481     *a_ht =
2482         ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
2483         SET_CTB_ALIGN(
2484             ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
2485     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2486     ASSERT(n_tot_layers >= 3);
2487 
2488     /* --- HME sync Dep Mngr Delete --    */
2489     for(i = 1; i < n_tot_layers; i++)
2490     {
2491         /* Note : i-1, only for HME layers, L0 is separate */
2492         ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
2493     }
2494 }
2495 
2496 /**
2497 *******************************************************************************
2498 *  @fn     S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2499 *
2500 *  @brief  Fills up memtabs with memory information details required by HME
2501 *
2502 *  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2503 *              up its requirements of memory
2504 *
2505 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2506 *                       amt of memory
2507 *
2508 *  @return   Number of memtabs required
2509 *******************************************************************************
2510 */
hme_enc_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,WORD32 i4_num_me_frm_pllel)2511 S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
2512 {
2513     S32 num, tot, i;
2514 
2515     /* Validation of init params */
2516     if(-1 == hme_validate_init_prms(ps_prms))
2517         return (-1);
2518 
2519     num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
2520     tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2521     for(i = num; i < tot; i++)
2522     {
2523         ps_memtabs[i].size = 4;
2524         ps_memtabs[i].align = 4;
2525         ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2526     }
2527     return (tot);
2528 }
2529 
2530 /**
2531 *******************************************************************************
2532 *  @fn     S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2533 *
2534 *  @brief  Fills up memtabs with memory information details required by Coarse HME
2535 *
2536 *  @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
2537 *              up its requirements of memory
2538 *
2539 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2540 *                       amt of memory
2541 *
2542 *  @return   Number of memtabs required
2543 *******************************************************************************
2544 */
hme_coarse_alloc(hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2545 S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2546 {
2547     S32 num, tot, i;
2548 
2549     /* Validation of init params */
2550     if(-1 == hme_validate_init_prms(ps_prms))
2551         return (-1);
2552 
2553     num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
2554     tot = hme_coarse_num_alloc();
2555     for(i = num; i < tot; i++)
2556     {
2557         ps_memtabs[i].size = 4;
2558         ps_memtabs[i].align = 4;
2559         ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
2560     }
2561     return (tot);
2562 }
2563 
2564 /**
2565 *******************************************************************************
2566 *  @fn hme_coarse_dep_mngr_alloc
2567 *
2568 *  @brief  Fills up memtabs with memory information details required by Coarse HME
2569 *
2570 * \param[in,out]  ps_mem_tab : pointer to memory descriptors table
2571 * \param[in] ps_init_prms : Create time static parameters
2572 * \param[in] i4_mem_space : memspace in whihc memory request should be done
2573 *
2574 *  @return   Number of memtabs required
2575 *******************************************************************************
2576 */
hme_coarse_dep_mngr_alloc(iv_mem_rec_t * ps_mem_tab,ihevce_static_cfg_params_t * ps_init_prms,WORD32 i4_mem_space,WORD32 i4_num_proc_thrds,WORD32 i4_resolution_id)2577 WORD32 hme_coarse_dep_mngr_alloc(
2578     iv_mem_rec_t *ps_mem_tab,
2579     ihevce_static_cfg_params_t *ps_init_prms,
2580     WORD32 i4_mem_space,
2581     WORD32 i4_num_proc_thrds,
2582     WORD32 i4_resolution_id)
2583 {
2584     S32 num, tot, i;
2585 
2586     num = hme_coarse_dep_mngr_alloc_mem(
2587         ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
2588     tot = hme_coarse_dep_mngr_num_alloc();
2589     for(i = num; i < tot; i++)
2590     {
2591         ps_mem_tab[i].i4_mem_size = 4;
2592         ps_mem_tab[i].i4_mem_alignment = 4;
2593         ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
2594     }
2595     return (tot);
2596 }
2597 
2598 /**
2599 ********************************************************************************
2600 *  @fn     hme_coarse_init_ctxt()
2601 *
2602 *  @brief  initialise context memory
2603 *
2604 *  @param[in] ps_prms : init prms
2605 *
2606 *  @param[in] pv_ctxt : ME ctxt
2607 *
2608 *  @return  number of memtabs
2609 ********************************************************************************
2610 */
hme_coarse_init_ctxt(coarse_me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms)2611 void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
2612 {
2613     S32 i, j, num_thrds;
2614     coarse_me_ctxt_t *ps_ctxt;
2615     S32 num_rows_coarse;
2616 
2617     /* initialise the parameters inot context of all threads */
2618     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2619     {
2620         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2621 
2622         /* Copy the init prms to context */
2623         ps_ctxt->s_init_prms = *ps_prms;
2624 
2625         /* Initialize some other variables in ctxt */
2626         ps_ctxt->i4_prev_poc = -1;
2627 
2628         ps_ctxt->num_b_frms = ps_prms->num_b_frms;
2629 
2630         ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
2631         ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
2632 
2633         /* Initialize num rows lookuptable */
2634         ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
2635         num_rows_coarse = ps_ctxt->i4_num_row_bufs;
2636         for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
2637         {
2638             ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
2639         }
2640     }
2641 
2642     /* since same layer desc pointer is stored in all the threads ctxt */
2643     /* layer init is done only using 0th thread ctxt                   */
2644     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2645 
2646     /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2647     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2648     {
2649         for(j = 1; j < ps_ctxt->num_layers; j++)
2650         {
2651             layer_ctxt_t *ps_layer;
2652             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2653             ps_layer->i4_poc = -1;
2654             ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
2655             memset(
2656                 ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
2657         }
2658     }
2659 }
2660 
2661 /**
2662 ********************************************************************************
2663 *  @fn     hme_enc_init_ctxt()
2664 *
2665 *  @brief  initialise context memory
2666 *
2667 *  @param[in] ps_prms : init prms
2668 *
2669 *  @param[in] pv_ctxt : ME ctxt
2670 *
2671 *  @return  number of memtabs
2672 ********************************************************************************
2673 */
hme_enc_init_ctxt(me_master_ctxt_t * ps_master_ctxt,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt)2674 void hme_enc_init_ctxt(
2675     me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
2676 {
2677     S32 i, j, num_thrds;
2678     me_ctxt_t *ps_ctxt;
2679     me_frm_ctxt_t *ps_frm_ctxt;
2680 
2681     /* initialise the parameters in context of all threads */
2682     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
2683     {
2684         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
2685         /* Store Tile params base into ME context */
2686         ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
2687 
2688         for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
2689         {
2690             ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
2691 
2692             /* Copy the init prms to context */
2693             ps_ctxt->s_init_prms = *ps_prms;
2694 
2695             /* Initialize some other variables in ctxt */
2696             ps_frm_ctxt->i4_prev_poc = INVALID_POC;
2697 
2698             ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
2699 
2700             ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
2701 
2702             ps_frm_ctxt->i4_is_prev_frame_reference = 0;
2703 
2704             ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
2705 
2706             /* Initialize mv grids for L0 and L1 used in final refinement layer */
2707             {
2708                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
2709                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
2710                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
2711                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
2712                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
2713                 hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
2714             }
2715 
2716             ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
2717             ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
2718         }
2719     }
2720 
2721     /* since same layer desc pointer is stored in all the threads ctxt */
2722     /* layer init is done only using 0th thread ctxt                   */
2723     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
2724 
2725     ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
2726 
2727     /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
2728     for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
2729     {
2730         /* only enocde layer is processed */
2731         for(j = 0; j < 1; j++)
2732         {
2733             layer_ctxt_t *ps_layer;
2734             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2735             ps_layer->i4_poc = INVALID_POC;
2736             ps_layer->i4_is_free = 1;
2737             ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
2738             ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
2739             ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
2740             ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
2741             ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
2742             ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
2743 
2744             memset(
2745                 ps_layer->s_global_mv,
2746                 0,
2747                 sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
2748         }
2749     }
2750 }
2751 
2752 /**
2753 *******************************************************************************
2754 *  @fn     S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
2755 *
2756 *  @brief  Initialises the Encode Layer HME ctxt
2757 *
2758 *  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2759 *              up its requirements of memory
2760 *
2761 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2762 *                       amt of memory
2763 *
2764 *  @return   Number of memtabs required
2765 *******************************************************************************
2766 */
hme_enc_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms,rc_quant_t * ps_rc_quant_ctxt,WORD32 i4_num_me_frm_pllel)2767 S32 hme_enc_init(
2768     void *pv_ctxt,
2769     hme_memtab_t *ps_memtabs,
2770     hme_init_prms_t *ps_prms,
2771     rc_quant_t *ps_rc_quant_ctxt,
2772     WORD32 i4_num_me_frm_pllel)
2773 {
2774     S32 num, tot;
2775     me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
2776 
2777     tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
2778     /* Validation of init params */
2779     if(-1 == hme_validate_init_prms(ps_prms))
2780         return (-1);
2781 
2782     num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
2783     if(num > tot)
2784         return (-1);
2785 
2786     /* Initialize all enumerations based globals */
2787     //hme_init_globals(); /* done as part of coarse me */
2788 
2789     /* Copy the memtabs into the context for returning during free */
2790     memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2791 
2792     /* initialize the context and related buffers */
2793     hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
2794     return (0);
2795 }
2796 
2797 /**
2798 *******************************************************************************
2799 *  @fn     S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2800 *
2801 *  @brief  Initialises the Coarse HME ctxt
2802 *
2803 *  @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
2804 *              up its requirements of memory
2805 *
2806 *  @param[in] ps_prms : Input parameters to module crucial in calculating reqd
2807 *                       amt of memory
2808 *
2809 *  @return   Number of memtabs required
2810 *******************************************************************************
2811 */
hme_coarse_init(void * pv_ctxt,hme_memtab_t * ps_memtabs,hme_init_prms_t * ps_prms)2812 S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
2813 {
2814     S32 num, tot;
2815     coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
2816 
2817     tot = hme_coarse_num_alloc();
2818     /* Validation of init params */
2819     if(-1 == hme_validate_init_prms(ps_prms))
2820         return (-1);
2821 
2822     num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
2823     if(num > tot)
2824         return (-1);
2825 
2826     /* Initialize all enumerations based globals */
2827     hme_init_globals();
2828 
2829     /* Copy the memtabs into the context for returning during free */
2830     memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
2831 
2832     /* initialize the context and related buffers */
2833     hme_coarse_init_ctxt(ps_ctxt, ps_prms);
2834 
2835     return (0);
2836 }
2837 
2838 /**
2839 *******************************************************************************
2840 *  @fn     S32 hme_set_resolution(void *pv_me_ctxt,
2841 *                                   S32 n_enc_layers,
2842 *                                   S32 *p_wd,
2843 *                                   S32 *p_ht
2844 *
2845 *  @brief  Sets up the layers based on resolution information.
2846 *
2847 *  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2848 *
2849 *  @param[in] n_enc_layers : Number of layers encoded
2850 *
2851 *  @param[in] p_wd : Pointer to an array having widths for each encode layer
2852 *
2853 *  @param[in] p_ht : Pointer to an array having heights for each encode layer
2854 *
2855 *  @return   void
2856 *******************************************************************************
2857 */
2858 
hme_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht,S32 me_frm_id)2859 void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
2860 {
2861     S32 n_tot_layers, num_layers_explicit_search, i, j;
2862     me_ctxt_t *ps_thrd_ctxt;
2863     me_frm_ctxt_t *ps_ctxt;
2864 
2865     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2866     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2867     memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2868     memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2869 
2870     ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
2871 
2872     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
2873 
2874     /*************************************************************************/
2875     /* Derive the number of HME layers, including both encoded and non encode*/
2876     /* This function also derives the width and ht of each layer.            */
2877     /*************************************************************************/
2878     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2879     num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
2880     if(num_layers_explicit_search <= 0)
2881         num_layers_explicit_search = n_tot_layers - 1;
2882 
2883     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2884     ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2885     memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2886     memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2887 
2888     /* only encode layer should be processed */
2889     ps_ctxt->num_layers = n_tot_layers;
2890 
2891     ps_ctxt->i4_wd = a_wd[0];
2892     ps_ctxt->i4_ht = a_ht[0];
2893 
2894     /* Memtabs : Layers * num-ref + 1 */
2895     for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
2896     {
2897         for(j = 0; j < 1; j++)
2898         {
2899             S32 wd, ht;
2900             layer_ctxt_t *ps_layer;
2901             U08 u1_enc = ps_ctxt->u1_encode[j];
2902             wd = a_wd[j];
2903             ht = a_ht[j];
2904             ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
2905             hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2906         }
2907     }
2908 }
2909 
2910 /**
2911 *******************************************************************************
2912 *  @fn     S32 hme_coarse_set_resolution(void *pv_me_ctxt,
2913 *                                   S32 n_enc_layers,
2914 *                                   S32 *p_wd,
2915 *                                   S32 *p_ht
2916 *
2917 *  @brief  Sets up the layers based on resolution information.
2918 *
2919 *  @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
2920 *
2921 *  @param[in] n_enc_layers : Number of layers encoded
2922 *
2923 *  @param[in] p_wd : Pointer to an array having widths for each encode layer
2924 *
2925 *  @param[in] p_ht : Pointer to an array having heights for each encode layer
2926 *
2927 *  @return   void
2928 *******************************************************************************
2929 */
2930 
hme_coarse_set_resolution(void * pv_me_ctxt,S32 n_enc_layers,S32 * p_wd,S32 * p_ht)2931 void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
2932 {
2933     S32 n_tot_layers, num_layers_explicit_search, i, j;
2934     coarse_me_ctxt_t *ps_ctxt;
2935     S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
2936     S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
2937     memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
2938     memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
2939 
2940     ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
2941     /*************************************************************************/
2942     /* Derive the number of HME layers, including both encoded and non encode*/
2943     /* This function also derives the width and ht of each layer.            */
2944     /*************************************************************************/
2945     n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
2946     num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
2947     if(num_layers_explicit_search <= 0)
2948         num_layers_explicit_search = n_tot_layers - 1;
2949 
2950     num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
2951     ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
2952     memset(ps_ctxt->u1_encode, 0, n_tot_layers);
2953     memset(ps_ctxt->u1_encode, 1, n_enc_layers);
2954 
2955     /* encode layer should be excluded */
2956     ps_ctxt->num_layers = n_tot_layers;
2957 
2958     memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
2959     memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
2960 
2961     /* Memtabs : Layers * num-ref + 1 */
2962     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2963     {
2964         for(j = 1; j < n_tot_layers; j++)
2965         {
2966             S32 wd, ht;
2967             layer_ctxt_t *ps_layer;
2968             U08 u1_enc = ps_ctxt->u1_encode[j];
2969             wd = a_wd[j];
2970             ht = a_ht[j];
2971             ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
2972             hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
2973         }
2974     }
2975 }
2976 
hme_find_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_poc,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)2977 S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
2978 {
2979     S32 i;
2980 
2981     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
2982     {
2983         if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
2984            ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
2985             return i;
2986     }
2987     /* Should not come here */
2988     ASSERT(0);
2989     return (-1);
2990 }
2991 
hme_coarse_find_descr_idx(coarse_me_ctxt_t * ps_ctxt,S32 i4_poc)2992 S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
2993 {
2994     S32 i;
2995 
2996     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
2997     {
2998         if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
2999             return i;
3000     }
3001     /* Should not come here */
3002     ASSERT(0);
3003     return (-1);
3004 }
3005 
hme_find_free_descr_idx(me_ctxt_t * ps_ctxt,S32 i4_num_me_frm_pllel)3006 S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
3007 {
3008     S32 i;
3009 
3010     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
3011     {
3012         if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
3013         {
3014             ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
3015             return i;
3016         }
3017     }
3018     /* Should not come here */
3019     ASSERT(0);
3020     return (-1);
3021 }
3022 
hme_coarse_find_free_descr_idx(void * pv_ctxt)3023 S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
3024 {
3025     S32 i;
3026 
3027     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
3028 
3029     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
3030     {
3031         if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
3032             return i;
3033     }
3034     /* Should not come here */
3035     ASSERT(0);
3036     return (-1);
3037 }
3038 
hme_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove,S32 i4_idr_gop_num,S32 i4_num_me_frm_pllel)3039 void hme_discard_frm(
3040     void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
3041 {
3042     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
3043     S32 count = 0, idx, i;
3044     layers_descr_t *ps_descr;
3045 
3046     /* Search for the id of the layer descriptor that has this poc */
3047     while(p_pocs_to_remove[count] != INVALID_POC)
3048     {
3049         ASSERT(count == 0);
3050         idx = hme_find_descr_idx(
3051             ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
3052         ps_descr = &ps_ctxt->as_ref_descr[idx];
3053         /*********************************************************************/
3054         /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt        */
3055         /* Now this can be used for a fresh picture.                         */
3056         /*********************************************************************/
3057         for(i = 0; i < 1; i++)
3058         {
3059             ps_descr->aps_layers[i]->i4_is_free = 1;
3060         }
3061         count++;
3062     }
3063 }
3064 
hme_coarse_discard_frm(void * pv_me_ctxt,S32 * p_pocs_to_remove)3065 void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
3066 {
3067     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
3068     S32 count = 0, idx, i;
3069     layers_descr_t *ps_descr;
3070 
3071     /* Search for the id of the layer descriptor that has this poc */
3072     while(p_pocs_to_remove[count] != -1)
3073     {
3074         idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
3075         ps_descr = &ps_ctxt->as_ref_descr[idx];
3076         /*********************************************************************/
3077         /* Setting poc = -1 in all layers invalidates this layer ctxt        */
3078         /* Now this can be used for a fresh picture.                         */
3079         /*********************************************************************/
3080         for(i = 1; i < ps_ctxt->num_layers; i++)
3081         {
3082             ps_descr->aps_layers[i]->i4_poc = -1;
3083         }
3084         count++;
3085     }
3086 }
3087 
hme_update_layer_desc(layers_descr_t * ps_layers_desc,hme_ref_desc_t * ps_ref_desc,S32 start_lyr_id,S32 num_layers,layers_descr_t * ps_curr_desc)3088 void hme_update_layer_desc(
3089     layers_descr_t *ps_layers_desc,
3090     hme_ref_desc_t *ps_ref_desc,
3091     S32 start_lyr_id,
3092     S32 num_layers,
3093     layers_descr_t *ps_curr_desc)
3094 {
3095     layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
3096     S32 i;
3097     for(i = start_lyr_id; i < num_layers; i++)
3098     {
3099         ps_layer_ctxt = ps_layers_desc->aps_layers[i];
3100         ps_curr_layer = ps_curr_desc->aps_layers[i];
3101 
3102         ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
3103         ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
3104 
3105         /* Copy the recon planes for the given reference pic at given layer */
3106         ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
3107         ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
3108         ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
3109         ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
3110 
3111         /*********************************************************************/
3112         /* reconstruction strides, offsets and padding info are copied for   */
3113         /* this reference pic. It is assumed that these will be same across  */
3114         /* pics, so even the current pic has this info updated, though the   */
3115         /* current pic still does not have valid recon pointers.             */
3116         /*********************************************************************/
3117         ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3118         ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
3119         ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3120         ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3121 
3122         ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
3123         ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
3124         ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
3125     }
3126 }
3127 
hme_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,S32 me_frm_id,S32 i4_thrd_id)3128 void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
3129 {
3130     layers_descr_t *ps_desc;
3131     layer_ctxt_t *ps_layer_ctxt;
3132     me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
3133     me_ctxt_t *ps_thrd_ctxt;
3134     me_frm_ctxt_t *ps_ctxt;
3135 
3136     hme_inp_buf_attr_t *ps_attr;
3137     S32 i4_poc, idx, i, i4_prev_poc;
3138     S32 num_thrds, prev_me_frm_id;
3139     S32 i4_idr_gop_num, i4_is_reference;
3140 
3141     /* since same layer desc pointer is stored in all thread ctxt */
3142     /* a free idx is obtained using 0th thread ctxt pointer */
3143 
3144     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
3145 
3146     ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3147 
3148     /* Deriving the previous poc from previous frames context */
3149     if(me_frm_id == 0)
3150         prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
3151     else
3152         prev_me_frm_id = me_frm_id - 1;
3153 
3154     i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
3155 
3156     /* Obtain an empty layer descriptor */
3157     idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
3158     ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
3159 
3160     /* initialise the parameters for all the threads */
3161     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3162     {
3163         me_frm_ctxt_t *ps_tmp_frm_ctxt;
3164 
3165         ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3166         ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
3167 
3168         ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
3169 
3170         /* Do the initialization for the first thread alone */
3171         i4_poc = ps_inp_desc->i4_poc;
3172         i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
3173         i4_is_reference = ps_inp_desc->i4_is_reference;
3174         /*Update poc id of previously encoded frm and curr frm */
3175         ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
3176         ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
3177     }
3178 
3179     /* since same layer desc pointer is stored in all thread ctxt */
3180     /* following processing is done using 0th thread ctxt pointer */
3181     ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3182 
3183     /* only encode layer */
3184     for(i = 0; i < 1; i++)
3185     {
3186         ps_layer_ctxt = ps_desc->aps_layers[i];
3187         ps_attr = &ps_inp_desc->s_layer_desc[i];
3188 
3189         ps_layer_ctxt->i4_poc = i4_poc;
3190         ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
3191         ps_layer_ctxt->i4_is_reference = i4_is_reference;
3192         ps_layer_ctxt->i4_non_ref_free = 0;
3193 
3194         /* If this layer is encoded, copy input attributes */
3195         if(ps_ctxt->u1_encode[i])
3196         {
3197             ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3198             ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3199             ps_layer_ctxt->i4_pad_x_inp = 0;
3200             ps_layer_ctxt->i4_pad_y_inp = 0;
3201         }
3202         else
3203         {
3204             /* If not encoded, then ME owns the buffer.*/
3205             S32 wd, dst_stride;
3206 
3207             ASSERT(i != 0);
3208 
3209             wd = ps_ctxt->i4_wd;
3210 
3211             /* destination has padding on either side of 16 */
3212             dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3213             ps_layer_ctxt->i4_inp_stride = dst_stride;
3214         }
3215     }
3216 
3217     return;
3218 }
3219 
hme_coarse_add_inp(void * pv_me_ctxt,hme_inp_desc_t * ps_inp_desc,WORD32 i4_curr_idx)3220 void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
3221 {
3222     layers_descr_t *ps_desc;
3223     layer_ctxt_t *ps_layer_ctxt;
3224     coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
3225     coarse_me_ctxt_t *ps_ctxt;
3226     hme_inp_buf_attr_t *ps_attr;
3227     S32 i4_poc, i;
3228     S32 num_thrds;
3229 
3230     /* since same layer desc pointer is stored in all thread ctxt */
3231     /* a free idx is obtained using 0th thread ctxt pointer */
3232     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3233 
3234     ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
3235 
3236     /* initialise the parameters for all the threads */
3237     for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
3238     {
3239         ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
3240         ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
3241         i4_poc = ps_inp_desc->i4_poc;
3242 
3243         /*Update poc id of previously encoded frm and curr frm */
3244         ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
3245         ps_ctxt->i4_curr_poc = i4_poc;
3246     }
3247 
3248     /* since same layer desc pointer is stored in all thread ctxt */
3249     /* following processing is done using 0th thread ctxt pointer */
3250     ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
3251 
3252     /* only non encode layer */
3253     for(i = 1; i < ps_ctxt->num_layers; i++)
3254     {
3255         ps_layer_ctxt = ps_desc->aps_layers[i];
3256         ps_attr = &ps_inp_desc->s_layer_desc[i];
3257 
3258         ps_layer_ctxt->i4_poc = i4_poc;
3259         /* If this layer is encoded, copy input attributes */
3260         if(ps_ctxt->u1_encode[i])
3261         {
3262             ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
3263             ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
3264             ps_layer_ctxt->i4_pad_x_inp = 0;
3265             ps_layer_ctxt->i4_pad_y_inp = 0;
3266         }
3267         else
3268         {
3269             /* If not encoded, then ME owns the buffer.           */
3270             /* decomp of lower layers happens on a seperate pass  */
3271             /* Coarse Me should export the pointers to the caller */
3272             S32 wd, dst_stride;
3273 
3274             ASSERT(i != 0);
3275 
3276             wd = ps_ctxt->a_wd[i - 1];
3277 
3278             /* destination has padding on either side of 16 */
3279             dst_stride = CEIL16((wd >> 1)) + 32 + 4;
3280             ps_layer_ctxt->i4_inp_stride = dst_stride;
3281         }
3282     }
3283 }
3284 
hme_determine_num_results_per_part(U08 u1_layer_id,U08 u1_num_layers,ME_QUALITY_PRESETS_T e_quality_preset)3285 static __inline U08 hme_determine_num_results_per_part(
3286     U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
3287 {
3288     U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
3289 
3290     if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
3291     {
3292         switch(e_quality_preset)
3293         {
3294         case ME_XTREME_SPEED_25:
3295         case ME_XTREME_SPEED:
3296         case ME_HIGH_SPEED:
3297         case ME_MEDIUM_SPEED:
3298         case ME_HIGH_QUALITY:
3299         case ME_PRISTINE_QUALITY:
3300         {
3301             u1_num_results_per_part = 1;
3302 
3303             break;
3304         }
3305         default:
3306         {
3307             u1_num_results_per_part = MAX_RESULTS_PER_PART;
3308 
3309             break;
3310         }
3311         }
3312     }
3313     else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
3314     {
3315         switch(e_quality_preset)
3316         {
3317         case ME_XTREME_SPEED_25:
3318         case ME_HIGH_QUALITY:
3319         case ME_PRISTINE_QUALITY:
3320         {
3321             u1_num_results_per_part = 1;
3322 
3323             break;
3324         }
3325         default:
3326         {
3327             u1_num_results_per_part = MAX_RESULTS_PER_PART;
3328 
3329             break;
3330         }
3331         }
3332     }
3333     else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
3334     {
3335         switch(e_quality_preset)
3336         {
3337         case ME_XTREME_SPEED_25:
3338         case ME_XTREME_SPEED:
3339         case ME_HIGH_SPEED:
3340         case ME_MEDIUM_SPEED:
3341         {
3342             u1_num_results_per_part = 1;
3343 
3344             break;
3345         }
3346         default:
3347         {
3348             u1_num_results_per_part = MAX_RESULTS_PER_PART;
3349 
3350             break;
3351         }
3352         }
3353     }
3354 
3355     return u1_num_results_per_part;
3356 }
3357 
hme_max_search_cands_per_search_cand_loc_populator(hme_frm_prms_t * ps_frm_prms,U08 * pu1_num_fpel_search_cands,U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3358 static __inline void hme_max_search_cands_per_search_cand_loc_populator(
3359     hme_frm_prms_t *ps_frm_prms,
3360     U08 *pu1_num_fpel_search_cands,
3361     U08 u1_layer_id,
3362     ME_QUALITY_PRESETS_T e_quality_preset)
3363 {
3364     if(0 == u1_layer_id)
3365     {
3366         S32 i;
3367 
3368         for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
3369         {
3370             switch(e_quality_preset)
3371             {
3372 #if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
3373             case ME_XTREME_SPEED_25:
3374             case ME_XTREME_SPEED:
3375             case ME_HIGH_SPEED:
3376             case ME_MEDIUM_SPEED:
3377             {
3378                 pu1_num_fpel_search_cands[i] = 1;
3379 
3380                 break;
3381             }
3382 #endif
3383             default:
3384             {
3385                 pu1_num_fpel_search_cands[i] =
3386                     MAX(2,
3387                         MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
3388                             ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
3389 
3390                 break;
3391             }
3392             }
3393         }
3394     }
3395 }
3396 
3397 static __inline U08
hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3398     hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3399 {
3400     U08 u1_num_cands = 2;
3401 
3402     if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
3403     {
3404         switch(e_quality_preset)
3405         {
3406         case ME_XTREME_SPEED_25:
3407         case ME_XTREME_SPEED:
3408         case ME_HIGH_SPEED:
3409         case ME_MEDIUM_SPEED:
3410         {
3411             u1_num_cands = 1;
3412 
3413             break;
3414         }
3415         default:
3416         {
3417             u1_num_cands = 2;
3418 
3419             break;
3420         }
3421         }
3422     }
3423 
3424     return u1_num_cands;
3425 }
3426 
3427 static __inline U08
hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id,ME_QUALITY_PRESETS_T e_quality_preset)3428     hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
3429 {
3430     U08 i;
3431 
3432     U08 u1_num_centers = 0;
3433 
3434     if(0 == u1_layer_id)
3435     {
3436         switch(e_quality_preset)
3437         {
3438         case ME_XTREME_SPEED_25:
3439         {
3440             for(i = 0; i < TOT_NUM_PARTS; i++)
3441             {
3442                 u1_num_centers += gau1_num_best_results_XS25[i];
3443             }
3444 
3445             break;
3446         }
3447         case ME_XTREME_SPEED:
3448         {
3449             for(i = 0; i < TOT_NUM_PARTS; i++)
3450             {
3451                 u1_num_centers += gau1_num_best_results_XS[i];
3452             }
3453 
3454             break;
3455         }
3456         case ME_HIGH_SPEED:
3457         {
3458             for(i = 0; i < TOT_NUM_PARTS; i++)
3459             {
3460                 u1_num_centers += gau1_num_best_results_HS[i];
3461             }
3462 
3463             break;
3464         }
3465         case ME_MEDIUM_SPEED:
3466         {
3467             for(i = 0; i < TOT_NUM_PARTS; i++)
3468             {
3469                 u1_num_centers += gau1_num_best_results_MS[i];
3470             }
3471 
3472             break;
3473         }
3474         case ME_HIGH_QUALITY:
3475         {
3476             for(i = 0; i < TOT_NUM_PARTS; i++)
3477             {
3478                 u1_num_centers += gau1_num_best_results_HQ[i];
3479             }
3480 
3481             break;
3482         }
3483         case ME_PRISTINE_QUALITY:
3484         {
3485             for(i = 0; i < TOT_NUM_PARTS; i++)
3486             {
3487                 u1_num_centers += gau1_num_best_results_PQ[i];
3488             }
3489 
3490             break;
3491         }
3492         }
3493     }
3494 
3495     return u1_num_centers;
3496 }
3497 
hme_determine_max_num_subpel_refine_centers(U08 u1_layer_id,U08 u1_max_2Nx2N_subpel_cands,U08 u1_max_NxN_subpel_cands)3498 static __inline U08 hme_determine_max_num_subpel_refine_centers(
3499     U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
3500 {
3501     U08 u1_num_centers = 0;
3502 
3503     if(0 == u1_layer_id)
3504     {
3505         u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
3506     }
3507 
3508     return u1_num_centers;
3509 }
3510 
hme_set_refine_prms(void * pv_refine_prms,U08 u1_encode,S32 num_ref,S32 layer_id,S32 num_layers,S32 num_layers_explicit_search,S32 use_4x4,hme_frm_prms_t * ps_frm_prms,double ** ppd_intra_costs,me_coding_params_t * ps_me_coding_tools)3511 void hme_set_refine_prms(
3512     void *pv_refine_prms,
3513     U08 u1_encode,
3514     S32 num_ref,
3515     S32 layer_id,
3516     S32 num_layers,
3517     S32 num_layers_explicit_search,
3518     S32 use_4x4,
3519     hme_frm_prms_t *ps_frm_prms,
3520     double **ppd_intra_costs,
3521     me_coding_params_t *ps_me_coding_tools)
3522 {
3523     refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
3524 
3525     ps_refine_prms->i4_encode = u1_encode;
3526     ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
3527     ps_refine_prms->i4_layer_id = layer_id;
3528     /*************************************************************************/
3529     /* Refinement layers have two lambdas, one for closed loop, another for  */
3530     /* open loop. Non encode layers use only open loop lambda.               */
3531     /*************************************************************************/
3532     ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
3533     ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
3534     ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
3535     ps_refine_prms->lambda_inp =
3536         ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3537     ps_refine_prms->lambda_recon =
3538         ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
3539 
3540     if((u1_encode) && (NULL != ppd_intra_costs))
3541     {
3542         ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
3543     }
3544 
3545     /* Explicit or implicit depends on number of layers having eplicit search */
3546     if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
3547     {
3548         ps_refine_prms->explicit_ref = 0;
3549         ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
3550     }
3551     else
3552     {
3553         ps_refine_prms->explicit_ref = 1;
3554         ps_refine_prms->i4_num_ref_fpel = num_ref;
3555     }
3556 
3557     ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
3558 
3559     ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
3560     ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
3561 
3562     if(u1_encode)
3563     {
3564         ps_refine_prms->i4_num_mvbank_results = 1;
3565         ps_refine_prms->i4_use_rec_in_fpel = 1;
3566         ps_refine_prms->i4_num_steps_fpel_refine = 1;
3567 
3568         if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3569         {
3570             ps_refine_prms->i4_num_fpel_results = 4;
3571             ps_refine_prms->i4_num_32x32_merge_results = 4;
3572             ps_refine_prms->i4_num_64x64_merge_results = 4;
3573             ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3574             ps_refine_prms->i4_use_satd_subpel = 1;
3575             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3576             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3577             ps_refine_prms->u1_subpel_candt_threshold = 1;
3578             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3579             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3580             ps_refine_prms->limit_active_partitions = 0;
3581         }
3582         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3583         {
3584             ps_refine_prms->i4_num_fpel_results = 4;
3585             ps_refine_prms->i4_num_32x32_merge_results = 4;
3586             ps_refine_prms->i4_num_64x64_merge_results = 4;
3587             ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
3588             ps_refine_prms->i4_use_satd_subpel = 1;
3589             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3590             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3591             ps_refine_prms->u1_subpel_candt_threshold = 2;
3592             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3593             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3594             ps_refine_prms->limit_active_partitions = 0;
3595         }
3596         else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3597         {
3598             ps_refine_prms->i4_num_fpel_results = 1;
3599             ps_refine_prms->i4_num_32x32_merge_results = 2;
3600             ps_refine_prms->i4_num_64x64_merge_results = 2;
3601             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3602             ps_refine_prms->i4_use_satd_subpel = 1;
3603             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
3604             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3605             ps_refine_prms->u1_subpel_candt_threshold = 3;
3606             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3607             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3608             ps_refine_prms->limit_active_partitions = 1;
3609         }
3610         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3611         {
3612             ps_refine_prms->i4_num_fpel_results = 1;
3613             ps_refine_prms->i4_num_32x32_merge_results = 2;
3614             ps_refine_prms->i4_num_64x64_merge_results = 2;
3615             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3616             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3617             ps_refine_prms->u1_max_subpel_candts_NxN = 1;
3618             ps_refine_prms->i4_use_satd_subpel = 0;
3619             ps_refine_prms->u1_subpel_candt_threshold = 0;
3620             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3621             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3622             ps_refine_prms->limit_active_partitions = 1;
3623         }
3624         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3625         {
3626             ps_refine_prms->i4_num_fpel_results = 1;
3627             ps_refine_prms->i4_num_32x32_merge_results = 2;
3628             ps_refine_prms->i4_num_64x64_merge_results = 2;
3629             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3630             ps_refine_prms->i4_use_satd_subpel = 0;
3631             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3632             ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3633             ps_refine_prms->u1_subpel_candt_threshold = 0;
3634             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3635             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3636             ps_refine_prms->limit_active_partitions = 1;
3637         }
3638         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3639         {
3640             ps_refine_prms->i4_num_fpel_results = 1;
3641             ps_refine_prms->i4_num_32x32_merge_results = 2;
3642             ps_refine_prms->i4_num_64x64_merge_results = 2;
3643             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3644             ps_refine_prms->i4_use_satd_subpel = 0;
3645             ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
3646             ps_refine_prms->u1_max_subpel_candts_NxN = 0;
3647             ps_refine_prms->u1_subpel_candt_threshold = 0;
3648             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3649             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3650             ps_refine_prms->limit_active_partitions = 1;
3651         }
3652     }
3653     else
3654     {
3655         ps_refine_prms->i4_num_fpel_results = 2;
3656         ps_refine_prms->i4_use_rec_in_fpel = 0;
3657         ps_refine_prms->i4_num_steps_fpel_refine = 1;
3658         ps_refine_prms->i4_num_steps_hpel_refine = 0;
3659         ps_refine_prms->i4_num_steps_qpel_refine = 0;
3660 
3661         if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
3662         {
3663             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3664             ps_refine_prms->i4_use_satd_subpel = 1;
3665             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3666             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
3667         }
3668         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
3669         {
3670             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3671             ps_refine_prms->i4_use_satd_subpel = 0;
3672             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3673             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
3674         }
3675         else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
3676         {
3677             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3678             ps_refine_prms->i4_use_satd_subpel = 0;
3679             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3680             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
3681         }
3682         else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
3683         {
3684             ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3685             ps_refine_prms->i4_use_satd_subpel = 1;
3686             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3687             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
3688         }
3689         else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
3690         {
3691             ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
3692             ps_refine_prms->i4_use_satd_subpel = 1;
3693             ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
3694             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
3695         }
3696         else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
3697         {
3698             ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
3699             ps_refine_prms->i4_use_satd_subpel = 1;
3700             ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
3701             ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
3702         }
3703 
3704         /* Following fields unused in the non-encode layers */
3705         /* But setting the same to default values           */
3706         ps_refine_prms->i4_num_32x32_merge_results = 4;
3707         ps_refine_prms->i4_num_64x64_merge_results = 4;
3708 
3709         if(!ps_frm_prms->bidir_enabled)
3710         {
3711             ps_refine_prms->limit_active_partitions = 0;
3712         }
3713         else
3714         {
3715             ps_refine_prms->limit_active_partitions = 1;
3716         }
3717     }
3718 
3719     ps_refine_prms->i4_enable_4x4_part =
3720         hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
3721 
3722     if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
3723     {
3724         ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3725             layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3726 
3727         hme_max_search_cands_per_search_cand_loc_populator(
3728             ps_frm_prms,
3729             ps_refine_prms->au1_num_fpel_search_cands,
3730             layer_id,
3731             ps_me_coding_tools->e_me_quality_presets);
3732 
3733         ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
3734             layer_id, ps_me_coding_tools->e_me_quality_presets);
3735 
3736         ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
3737             layer_id, ps_me_coding_tools->e_me_quality_presets);
3738 
3739         ps_refine_prms->u1_max_num_subpel_refine_centers =
3740             hme_determine_max_num_subpel_refine_centers(
3741                 layer_id,
3742                 ps_refine_prms->u1_max_subpel_candts_2Nx2N,
3743                 ps_refine_prms->u1_max_subpel_candts_NxN);
3744     }
3745     else
3746     {
3747         if(0 == layer_id)
3748         {
3749             ps_refine_prms->i4_num_results_per_part =
3750                 ps_me_coding_tools->u1_num_results_per_part_in_l0me;
3751         }
3752         else if(1 == layer_id)
3753         {
3754             ps_refine_prms->i4_num_results_per_part =
3755                 ps_me_coding_tools->u1_num_results_per_part_in_l1me;
3756         }
3757         else if((2 == layer_id) && (num_layers > 3))
3758         {
3759             ps_refine_prms->i4_num_results_per_part =
3760                 ps_me_coding_tools->u1_num_results_per_part_in_l2me;
3761         }
3762         else
3763         {
3764             ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
3765                 layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
3766         }
3767 
3768         memset(
3769             ps_refine_prms->au1_num_fpel_search_cands,
3770             ps_me_coding_tools->u1_max_num_coloc_cands,
3771             sizeof(ps_refine_prms->au1_num_fpel_search_cands));
3772 
3773         ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
3774             ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
3775 
3776         ps_refine_prms->u1_max_num_fpel_refine_centers =
3777             ps_me_coding_tools->u1_max_num_fpel_refine_centers;
3778 
3779         ps_refine_prms->u1_max_num_subpel_refine_centers =
3780             ps_me_coding_tools->u1_max_num_subpel_refine_centers;
3781     }
3782 
3783     if(layer_id != 0)
3784     {
3785         ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
3786     }
3787 
3788     /* 4 * lambda */
3789     ps_refine_prms->sdi_threshold =
3790         (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
3791         (ps_frm_prms->lambda_q_shift - 2);
3792 
3793     ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
3794         MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
3795 }
3796 
hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t * ps_attrs,S32 num_8x8_horz,S32 num_8x8_vert)3797 void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
3798 {
3799     S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
3800     S32 blk, blk_x, blk_y;
3801     S32 num_16x16_horz, num_16x16_vert;
3802     blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
3803 
3804     num_16x16_horz = (num_8x8_horz + 1) >> 1;
3805     num_16x16_vert = (num_8x8_vert + 1) >> 1;
3806     ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
3807 
3808     /*************************************************************************/
3809     /* Run through each blk assuming all 16x16 CUs valid. The order would be */
3810     /* 0   1   4   5                                                         */
3811     /* 2   3   6   7                                                         */
3812     /* 8   9   12  13                                                        */
3813     /* 10  11  14  15                                                        */
3814     /* Out of these some may not be valid. For example, if num_16x16_horz is */
3815     /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this    */
3816     /* case, blks 8-11 get encoding number of 4-7. Further, the variable     */
3817     /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable   */
3818     /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0     */
3819     /*************************************************************************/
3820     for(blk = 0; blk < 16; blk++)
3821     {
3822         U08 u1_blk_8x8_mask = 0xF;
3823         blk_x = gau1_encode_to_raster_x[blk];
3824         blk_y = gau1_encode_to_raster_y[blk];
3825         if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
3826         {
3827             continue;
3828         }
3829 
3830         /* The CU at encode location blk is valid */
3831         cu_16x16_valid_flag |= (1 << blk);
3832         ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
3833         ps_blk_attrs->u1_blk_x = blk_x;
3834         ps_blk_attrs->u1_blk_y = blk_y;
3835 
3836         /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
3837         if(((blk_x << 1) + 2) > num_8x8_horz)
3838             u1_blk_8x8_mask &= 0x5;
3839         /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
3840         if(((blk_y << 1) + 2) > num_8x8_vert)
3841             u1_blk_8x8_mask &= 0x3;
3842         ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
3843         ps_blk_attrs++;
3844     }
3845 
3846     ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
3847 
3848     /* 32x32 merge is logical combination of what merge is possible          */
3849     /* horizontally as well as vertically.                                   */
3850     if(num_8x8_horz < 4)
3851         merge_pattern_x = 0x0;
3852     else if(num_8x8_horz < 8)
3853         merge_pattern_x = 0x5;
3854     else
3855         merge_pattern_x = 0xF;
3856 
3857     if(num_8x8_vert < 4)
3858         merge_pattern_y = 0x0;
3859     else if(num_8x8_vert < 8)
3860         merge_pattern_y = 0x3;
3861     else
3862         merge_pattern_y = 0xF;
3863 
3864     ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
3865 
3866     /* Do not attempt 64x64 merge if any blk invalid */
3867     if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
3868         ps_attrs->u1_merge_to_64x64_flag = 0;
3869     else
3870         ps_attrs->u1_merge_to_64x64_flag = 1;
3871 }
3872 
hme_set_ctb_attrs(ctb_boundary_attrs_t * ps_attrs,S32 wd,S32 ht)3873 void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
3874 {
3875     S32 is_cropped_rt, is_cropped_bot;
3876 
3877     is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
3878     is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
3879 
3880     if(is_cropped_rt)
3881     {
3882         hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
3883     }
3884     if(is_cropped_bot)
3885     {
3886         hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
3887     }
3888     if(is_cropped_rt & is_cropped_bot)
3889     {
3890         hme_set_ctb_boundary_attrs(
3891             &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
3892     }
3893     hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
3894 }
3895 
3896 /**
3897 ********************************************************************************
3898 *  @fn     hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3899 *
3900 *  @brief  When we have an mv with ref id "poc_to" for which predictor to be
3901 *          computed, and predictor is ref id "poc_from", this funciton returns
3902 *          scale factor in Q8 for such a purpose
3903 *
3904 *  @param[in] curr_poc : input picture poc
3905 *
3906 *  @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
3907 *
3908 *  @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
3909 *
3910 *  @return Scale factor in Q8 format
3911 ********************************************************************************
3912 */
hme_scale_for_ref_idx(S32 curr_poc,S32 poc_from,S32 poc_to)3913 S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
3914 {
3915     S32 td, tx, tb;
3916     S16 i2_scf;
3917     /*************************************************************************/
3918     /* Approximate scale factor: 256 * num / denom                           */
3919     /* num = curr_poc - poc_to, denom = curr_poc - poc_from                  */
3920     /* Exact implementation as per standard.                                 */
3921     /*************************************************************************/
3922 
3923     tb = HME_CLIP((curr_poc - poc_to), -128, 127);
3924     td = HME_CLIP((curr_poc - poc_from), -128, 127);
3925 
3926     tx = (16384 + (ABS(td) >> 1)) / td;
3927     //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
3928     i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
3929 
3930     return (i2_scf);
3931 }
3932 
3933 /**
3934 ********************************************************************************
3935 *  @fn     hme_process_frm_init
3936 *
3937 *  @brief  HME frame level initialsation processing function
3938 *
3939 *  @param[in] pv_me_ctxt : ME ctxt pointer
3940 *
3941 *  @param[in] ps_ref_map : Reference map prms pointer
3942 *
3943 *  @param[in] ps_frm_prms :Pointer to frame params
3944 *
3945 *  called only for encode layer
3946 *
3947 *  @return Scale factor in Q8 format
3948 ********************************************************************************
3949 */
hme_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,WORD32 i4_me_frm_id,WORD32 i4_num_me_frm_pllel)3950 void hme_process_frm_init(
3951     void *pv_me_ctxt,
3952     hme_ref_map_t *ps_ref_map,
3953     hme_frm_prms_t *ps_frm_prms,
3954     WORD32 i4_me_frm_id,
3955     WORD32 i4_num_me_frm_pllel)
3956 {
3957     me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
3958     me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
3959 
3960     S32 i, j, desc_idx;
3961     S16 i2_max_x = 0, i2_max_y = 0;
3962 
3963     /* Set the Qp of current frm passed by caller. Required for intra cost */
3964     ps_ctxt->frm_qstep = ps_frm_prms->qstep;
3965     ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
3966 
3967     /* Bidir enabled or not */
3968     ps_ctxt->s_frm_prms = *ps_frm_prms;
3969 
3970     /*************************************************************************/
3971     /* Set up the ref pic parameters across all layers. For this, we do the  */
3972     /* following: the application has given us a ref pic list, we go index   */
3973     /* by index and pick up the picture. A picture can be uniquely be mapped */
3974     /* to a POC. So we search all layer descriptor array to find the POC     */
3975     /* Once found, we update all attributes in this descriptor.              */
3976     /* During this updation process we also create an index of descriptor id */
3977     /* to ref id mapping. It is important to find the same POC in the layers */
3978     /* descr strcture since it holds the pyramid inputs for non encode layers*/
3979     /* Apart from this, e also update array containing the index of the descr*/
3980     /* During processing for ease of access, each layer has a pointer to aray*/
3981     /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
3982     /* we update this too.                                                   */
3983     /*************************************************************************/
3984     ps_ctxt->num_ref_past = 0;
3985     ps_ctxt->num_ref_future = 0;
3986     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
3987     {
3988         S32 ref_id_lc, idx;
3989         hme_ref_desc_t *ps_ref_desc;
3990 
3991         ps_ref_desc = &ps_ref_map->as_ref_desc[i];
3992         ref_id_lc = ps_ref_desc->i1_ref_id_lc;
3993         /* Obtain the id of descriptor that contains this POC */
3994         idx = hme_find_descr_idx(
3995             ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
3996 
3997         /* Update all layers in this descr with the reference attributes */
3998         hme_update_layer_desc(
3999             &ps_thrd_ctxt->as_ref_descr[idx],
4000             ps_ref_desc,
4001             0,
4002             1,  //ps_ctxt->num_layers,
4003             ps_ctxt->ps_curr_descr);
4004 
4005         /* Update the pointer holder for the recon planes */
4006         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
4007         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
4008             &ps_ctxt->apu1_list_rec_fxfy[0][0];
4009         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
4010             &ps_ctxt->apu1_list_rec_hxfy[0][0];
4011         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
4012             &ps_ctxt->apu1_list_rec_fxhy[0][0];
4013         ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
4014             &ps_ctxt->apu1_list_rec_hxhy[0][0];
4015         ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
4016             &ps_ctxt->apv_list_dep_mngr[0][0];
4017 
4018         /* Update the array having ref id lc to descr id mapping */
4019         ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4020 
4021         /* From ref id lc we need to work out the POC, So update this array */
4022         ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4023 
4024         /* When computing costs in L0 and L1 directions, we need the */
4025         /* respective ref id L0 and L1, so update this mapping */
4026         ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4027         ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4028         if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4029         {
4030             ps_ctxt->au1_is_past[ref_id_lc] = 1;
4031             ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4032             ps_ctxt->num_ref_past++;
4033         }
4034         else
4035         {
4036             ps_ctxt->au1_is_past[ref_id_lc] = 0;
4037             ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4038             ps_ctxt->num_ref_future++;
4039         }
4040 
4041         if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4042         {
4043             /* copy the weight and offsets from current ref desc */
4044             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4045 
4046             /* inv weight is stored in Q15 format */
4047             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4048                 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4049             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4050         }
4051         else
4052         {
4053             /* store default wt and offset*/
4054             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4055 
4056             /* inv weight is stored in Q15 format */
4057             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4058                 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4059 
4060             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4061         }
4062     }
4063 
4064     ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4065     ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4066 
4067     /*************************************************************************/
4068     /* Preparation of the TLU for bits for reference indices.                */
4069     /* Special case is that of numref = 2. (TEV)                             */
4070     /* Other cases uses UEV                                                  */
4071     /*************************************************************************/
4072     for(i = 0; i < MAX_NUM_REF; i++)
4073     {
4074         ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4075         ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4076     }
4077 
4078     if(ps_ref_map->i4_num_ref == 2)
4079     {
4080         ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4081         ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4082         ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4083         ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4084     }
4085     else if(ps_ref_map->i4_num_ref > 2)
4086     {
4087         for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4088         {
4089             S32 l0, l1;
4090             l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4091             l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4092             ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4093             ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4094         }
4095     }
4096 
4097     /*************************************************************************/
4098     /* Preparation of the scaling factors for reference indices. The scale   */
4099     /* factor depends on distance of the two ref indices from current input  */
4100     /* in terms of poc delta.                                                */
4101     /*************************************************************************/
4102     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4103     {
4104         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4105         {
4106             S16 i2_scf_q8;
4107             S32 poc_from, poc_to;
4108 
4109             poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4110             poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4111 
4112             i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4113             ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4114         }
4115     }
4116 
4117     /*************************************************************************/
4118     /* We store simplified look ups for 4 hpel planes and inp y plane for    */
4119     /* every layer and for every ref id in the layer. So update these lookups*/
4120     /*************************************************************************/
4121     for(i = 0; i < 1; i++)
4122     {
4123         U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
4124         U08 **ppu1_inp;
4125         void **ppvlist_dep_mngr;
4126         layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4127 
4128         ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
4129         ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
4130         ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
4131         ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
4132         ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
4133         ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4134         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4135         {
4136             hme_ref_desc_t *ps_ref_desc;
4137             hme_ref_buf_info_t *ps_buf_info;
4138             layer_ctxt_t *ps_layer;
4139             S32 ref_id_lc;
4140 
4141             ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4142             ps_buf_info = &ps_ref_desc->as_ref_info[i];
4143             ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4144 
4145             desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4146             ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4147 
4148             ppu1_inp[j] = ps_buf_info->pu1_ref_src;
4149             ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4150             ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4151             ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4152             ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4153             ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
4154 
4155             /* Update the curr descriptors reference pointers here */
4156             ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
4157             ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
4158             ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
4159             ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
4160             ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
4161         }
4162     }
4163     /*************************************************************************/
4164     /* The mv range for each layer is computed. For dyadic layers it will    */
4165     /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4166     /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4167     /*************************************************************************/
4168     for(i = 0; i < 1; i++)
4169     {
4170         layer_ctxt_t *ps_layer_ctxt;
4171         if(i == 0)
4172         {
4173             i2_max_x = ps_frm_prms->i2_mv_range_x;
4174             i2_max_y = ps_frm_prms->i2_mv_range_y;
4175         }
4176         else
4177         {
4178             i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
4179             i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
4180         }
4181         ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4182         ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4183         ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4184 
4185         /*********************************************************************/
4186         /* Every layer maintains a reference id lc to POC mapping. This is   */
4187         /* because the mapping is unique for every frm. Also, in next frm,   */
4188         /* we require colocated mvs which means scaling according to temporal*/
4189         /*distance. Hence this mapping needs to be maintained in every       */
4190         /* layer ctxt                                                        */
4191         /*********************************************************************/
4192         memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4193         if(ps_ref_map->i4_num_ref)
4194         {
4195             memcpy(
4196                 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4197                 ps_ctxt->ai4_ref_idx_to_poc_lc,
4198                 ps_ref_map->i4_num_ref * sizeof(S32));
4199         }
4200     }
4201 
4202     return;
4203 }
4204 
4205 /**
4206 ********************************************************************************
4207 *  @fn     hme_coarse_process_frm_init
4208 *
4209 *  @brief  HME frame level initialsation processing function
4210 *
4211 *  @param[in] pv_me_ctxt : ME ctxt pointer
4212 *
4213 *  @param[in] ps_ref_map : Reference map prms pointer
4214 *
4215 *  @param[in] ps_frm_prms :Pointer to frame params
4216 *
4217 *  @return Scale factor in Q8 format
4218 ********************************************************************************
4219 */
hme_coarse_process_frm_init(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms)4220 void hme_coarse_process_frm_init(
4221     void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
4222 {
4223     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4224     S32 i, j, desc_idx;
4225     S16 i2_max_x = 0, i2_max_y = 0;
4226 
4227     /* Set the Qp of current frm passed by caller. Required for intra cost */
4228     ps_ctxt->frm_qstep = ps_frm_prms->qstep;
4229 
4230     /* Bidir enabled or not */
4231     ps_ctxt->s_frm_prms = *ps_frm_prms;
4232 
4233     /*************************************************************************/
4234     /* Set up the ref pic parameters across all layers. For this, we do the  */
4235     /* following: the application has given us a ref pic list, we go index   */
4236     /* by index and pick up the picture. A picture can be uniquely be mapped */
4237     /* to a POC. So we search all layer descriptor array to find the POC     */
4238     /* Once found, we update all attributes in this descriptor.              */
4239     /* During this updation process we also create an index of descriptor id */
4240     /* to ref id mapping. It is important to find the same POC in the layers */
4241     /* descr strcture since it holds the pyramid inputs for non encode layers*/
4242     /* Apart from this, e also update array containing the index of the descr*/
4243     /* During processing for ease of access, each layer has a pointer to aray*/
4244     /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
4245     /* we update this too.                                                   */
4246     /*************************************************************************/
4247     ps_ctxt->num_ref_past = 0;
4248     ps_ctxt->num_ref_future = 0;
4249     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4250     {
4251         S32 ref_id_lc, idx;
4252         hme_ref_desc_t *ps_ref_desc;
4253 
4254         ps_ref_desc = &ps_ref_map->as_ref_desc[i];
4255         ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4256         /* Obtain the id of descriptor that contains this POC */
4257         idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
4258 
4259         /* Update all layers in this descr with the reference attributes */
4260         hme_update_layer_desc(
4261             &ps_ctxt->as_ref_descr[idx],
4262             ps_ref_desc,
4263             1,
4264             ps_ctxt->num_layers - 1,
4265             ps_ctxt->ps_curr_descr);
4266 
4267         /* Update the array having ref id lc to descr id mapping */
4268         ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
4269 
4270         /* From ref id lc we need to work out the POC, So update this array */
4271         ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
4272 
4273         /* From ref id lc we need to work out the display num, So update this array */
4274         ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
4275 
4276         /* When computing costs in L0 and L1 directions, we need the */
4277         /* respective ref id L0 and L1, so update this mapping */
4278         ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
4279         ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
4280         if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
4281         {
4282             ps_ctxt->au1_is_past[ref_id_lc] = 1;
4283             ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
4284             ps_ctxt->num_ref_past++;
4285         }
4286         else
4287         {
4288             ps_ctxt->au1_is_past[ref_id_lc] = 0;
4289             ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
4290             ps_ctxt->num_ref_future++;
4291         }
4292         if(1 == ps_ctxt->i4_wt_pred_enable_flag)
4293         {
4294             /* copy the weight and offsets from current ref desc */
4295             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
4296 
4297             /* inv weight is stored in Q15 format */
4298             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4299                 ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
4300 
4301             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
4302         }
4303         else
4304         {
4305             /* store default wt and offset*/
4306             ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
4307 
4308             /* inv weight is stored in Q15 format */
4309             ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
4310                 ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
4311 
4312             ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
4313         }
4314     }
4315 
4316     ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
4317     ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
4318 
4319     /*************************************************************************/
4320     /* Preparation of the TLU for bits for reference indices.                */
4321     /* Special case is that of numref = 2. (TEV)                             */
4322     /* Other cases uses UEV                                                  */
4323     /*************************************************************************/
4324     for(i = 0; i < MAX_NUM_REF; i++)
4325     {
4326         ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
4327         ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
4328     }
4329 
4330     if(ps_ref_map->i4_num_ref == 2)
4331     {
4332         ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
4333         ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
4334         ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
4335         ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
4336     }
4337     else if(ps_ref_map->i4_num_ref > 2)
4338     {
4339         for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4340         {
4341             S32 l0, l1;
4342             l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
4343             l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
4344             ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
4345             ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
4346         }
4347     }
4348 
4349     /*************************************************************************/
4350     /* Preparation of the scaling factors for reference indices. The scale   */
4351     /* factor depends on distance of the two ref indices from current input  */
4352     /* in terms of poc delta.                                                */
4353     /*************************************************************************/
4354     for(i = 0; i < ps_ref_map->i4_num_ref; i++)
4355     {
4356         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4357         {
4358             S16 i2_scf_q8;
4359             S32 poc_from, poc_to;
4360 
4361             poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
4362             poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
4363 
4364             i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
4365             ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
4366         }
4367     }
4368 
4369     /*************************************************************************/
4370     /* We store simplified look ups for inp y plane for                      */
4371     /* every layer and for every ref id in the layer.                        */
4372     /*************************************************************************/
4373     for(i = 1; i < ps_ctxt->num_layers; i++)
4374     {
4375         U08 **ppu1_inp;
4376 
4377         ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
4378         for(j = 0; j < ps_ref_map->i4_num_ref; j++)
4379         {
4380             hme_ref_desc_t *ps_ref_desc;
4381             hme_ref_buf_info_t *ps_buf_info;
4382             layer_ctxt_t *ps_layer;
4383             S32 ref_id_lc;
4384 
4385             ps_ref_desc = &ps_ref_map->as_ref_desc[j];
4386             ps_buf_info = &ps_ref_desc->as_ref_info[i];
4387             ref_id_lc = ps_ref_desc->i1_ref_id_lc;
4388 
4389             desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
4390             ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
4391 
4392             ppu1_inp[j] = ps_layer->pu1_inp;
4393         }
4394     }
4395     /*************************************************************************/
4396     /* The mv range for each layer is computed. For dyadic layers it will    */
4397     /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
4398     /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
4399     /*************************************************************************/
4400 
4401     /* set to layer 0 search range params */
4402     i2_max_x = ps_frm_prms->i2_mv_range_x;
4403     i2_max_y = ps_frm_prms->i2_mv_range_y;
4404 
4405     for(i = 1; i < ps_ctxt->num_layers; i++)
4406     {
4407         layer_ctxt_t *ps_layer_ctxt;
4408 
4409         {
4410             i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
4411             i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
4412         }
4413         ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
4414         ps_layer_ctxt->i2_max_mv_x = i2_max_x;
4415         ps_layer_ctxt->i2_max_mv_y = i2_max_y;
4416 
4417         /*********************************************************************/
4418         /* Every layer maintains a reference id lc to POC mapping. This is   */
4419         /* because the mapping is unique for every frm. Also, in next frm,   */
4420         /* we require colocated mvs which means scaling according to temporal*/
4421         /*distance. Hence this mapping needs to be maintained in every       */
4422         /* layer ctxt                                                        */
4423         /*********************************************************************/
4424         memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
4425         if(ps_ref_map->i4_num_ref)
4426         {
4427             memcpy(
4428                 ps_layer_ctxt->ai4_ref_id_to_poc_lc,
4429                 ps_ctxt->ai4_ref_idx_to_poc_lc,
4430                 ps_ref_map->i4_num_ref * sizeof(S32));
4431             memcpy(
4432                 ps_layer_ctxt->ai4_ref_id_to_disp_num,
4433                 ps_ctxt->ai4_ref_idx_to_disp_num,
4434                 ps_ref_map->i4_num_ref * sizeof(S32));
4435         }
4436     }
4437 
4438     return;
4439 }
4440 
4441 /**
4442 ********************************************************************************
4443 *  @fn     hme_process_frm
4444 *
4445 *  @brief  HME frame level processing function
4446 *
4447 *  @param[in] pv_me_ctxt : ME ctxt pointer
4448 *
4449 *  @param[in] ps_ref_map : Reference map prms pointer
4450 *
4451 *  @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
4452 *
4453 *  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4454 *
4455 *  @param[in] pf_ext_update_fxn : function pointer to update CTb results
4456 *
4457 *  @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
4458 *
4459 *  @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
4460 *
4461 *  @return Scale factor in Q8 format
4462 ********************************************************************************
4463 */
4464 
hme_process_frm(void * pv_me_ctxt,pre_enc_L0_ipe_encloop_ctxt_t * ps_l0_ipe_input,hme_ref_map_t * ps_ref_map,double ** ppd_intra_costs,hme_frm_prms_t * ps_frm_prms,PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,void * pv_coarse_layer,void * pv_multi_thrd_ctxt,S32 i4_frame_parallelism_level,S32 thrd_id,S32 i4_me_frm_id)4465 void hme_process_frm(
4466     void *pv_me_ctxt,
4467     pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
4468     hme_ref_map_t *ps_ref_map,
4469     double **ppd_intra_costs,
4470     hme_frm_prms_t *ps_frm_prms,
4471     PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
4472     void *pv_coarse_layer,
4473     void *pv_multi_thrd_ctxt,
4474     S32 i4_frame_parallelism_level,
4475     S32 thrd_id,
4476     S32 i4_me_frm_id)
4477 {
4478     refine_prms_t s_refine_prms;
4479     me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
4480     me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
4481 
4482     S32 lyr_job_type;
4483     multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4484     layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
4485 
4486     ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4487 
4488     lyr_job_type = ME_JOB_ENC_LYR;
4489     /*************************************************************************/
4490     /* Final L0 layer ME call                                                */
4491     /*************************************************************************/
4492     {
4493         /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
4494         hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
4495 
4496         hme_set_refine_prms(
4497             &s_refine_prms,
4498             ps_ctxt->u1_encode[0],
4499             ps_ref_map->i4_num_ref,
4500             0,
4501             ps_ctxt->num_layers,
4502             ps_ctxt->num_layers_explicit_search,
4503             ps_thrd_ctxt->s_init_prms.use_4x4,
4504             ps_frm_prms,
4505             ppd_intra_costs,
4506             &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
4507 
4508         hme_refine(
4509             ps_thrd_ctxt,
4510             &s_refine_prms,
4511             pf_ext_update_fxn,
4512             ps_coarse_layer,
4513             ps_multi_thrd_ctxt,
4514             lyr_job_type,
4515             thrd_id,
4516             i4_me_frm_id,
4517             ps_l0_ipe_input);
4518 
4519         /* Set current ref pic status which will used as perv frame ref pic */
4520         if(i4_frame_parallelism_level)
4521         {
4522             ps_ctxt->i4_is_prev_frame_reference = 0;
4523         }
4524         else
4525         {
4526             ps_ctxt->i4_is_prev_frame_reference =
4527                 ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
4528                     ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
4529         }
4530     }
4531 
4532     return;
4533 }
4534 
4535 /**
4536 ********************************************************************************
4537 *  @fn     hme_coarse_process_frm
4538 *
4539 *  @brief  HME frame level processing function (coarse + refine)
4540 *
4541 *  @param[in] pv_me_ctxt : ME ctxt pointer
4542 *
4543 *  @param[in] ps_ref_map : Reference map prms pointer
4544 *
4545 *  @param[in] ps_frm_prms : pointer to Frame level parameters of HME
4546 *
4547 *  @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
4548 *
4549 *  @return Scale factor in Q8 format
4550 ********************************************************************************
4551 */
4552 
hme_coarse_process_frm(void * pv_me_ctxt,hme_ref_map_t * ps_ref_map,hme_frm_prms_t * ps_frm_prms,void * pv_multi_thrd_ctxt,WORD32 i4_ping_pong,void ** ppv_dep_mngr_hme_sync)4553 void hme_coarse_process_frm(
4554     void *pv_me_ctxt,
4555     hme_ref_map_t *ps_ref_map,
4556     hme_frm_prms_t *ps_frm_prms,
4557     void *pv_multi_thrd_ctxt,
4558     WORD32 i4_ping_pong,
4559     void **ppv_dep_mngr_hme_sync)
4560 {
4561     S16 i2_max;
4562     S32 layer_id;
4563     coarse_prms_t s_coarse_prms;
4564     refine_prms_t s_refine_prms;
4565     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4566     S32 lyr_job_type;
4567     multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
4568 
4569     ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
4570     /*************************************************************************/
4571     /* Fire processing of all layers, starting with coarsest layer.          */
4572     /*************************************************************************/
4573     layer_id = ps_ctxt->num_layers - 1;
4574     i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
4575     i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
4576     s_coarse_prms.i4_layer_id = layer_id;
4577     {
4578         S32 log_start_step;
4579         /* Based on Preset, set the starting step size for Refinement */
4580         if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
4581         {
4582             log_start_step = 0;
4583         }
4584         else
4585         {
4586             log_start_step = 1;
4587         }
4588 
4589         s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
4590         s_coarse_prms.i4_start_step = 1 << log_start_step;
4591     }
4592     s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
4593     s_coarse_prms.do_full_search = 1;
4594     if(s_coarse_prms.do_full_search)
4595     {
4596         /* Set to 2 or 4 */
4597         if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
4598             s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
4599         else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
4600             s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
4601     }
4602     s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
4603 
4604     /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
4605     s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
4606     s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
4607     s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
4608 
4609     hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
4610 
4611     /* all refinement layer processed in the loop below */
4612     layer_id--;
4613     lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
4614 
4615     /*************************************************************************/
4616     /* This loop will run for all refine layers (non- encode layers)          */
4617     /*************************************************************************/
4618     while(layer_id > 0)
4619     {
4620         hme_set_refine_prms(
4621             &s_refine_prms,
4622             ps_ctxt->u1_encode[layer_id],
4623             ps_ref_map->i4_num_ref,
4624             layer_id,
4625             ps_ctxt->num_layers,
4626             ps_ctxt->num_layers_explicit_search,
4627             ps_ctxt->s_init_prms.use_4x4,
4628             ps_frm_prms,
4629             NULL,
4630             &ps_ctxt->s_init_prms.s_me_coding_tools);
4631 
4632         hme_refine_no_encode(
4633             ps_ctxt,
4634             &s_refine_prms,
4635             ps_multi_thrd_ctxt,
4636             lyr_job_type,
4637             i4_ping_pong,
4638             ppv_dep_mngr_hme_sync);
4639 
4640         layer_id--;
4641         lyr_job_type++;
4642     }
4643 }
4644 /**
4645 ********************************************************************************
4646 *  @fn     hme_fill_neighbour_mvs
4647 *
4648 *  @brief  HME neighbour MV population function
4649 *
4650 *  @param[in] pps_mv_grid : MV grid array pointer
4651 *
4652 *  @param[in] i4_ctb_x : CTB pos X
4653 
4654 *  @param[in] i4_ctb_y : CTB pos Y
4655 *
4656 *  @remarks :  Needs to be populated for proper implementation of cost fxn
4657 *
4658 *  @return Scale factor in Q8 format
4659 ********************************************************************************
4660 */
hme_fill_neighbour_mvs(mv_grid_t ** pps_mv_grid,S32 i4_ctb_x,S32 i4_ctb_y,S32 i4_num_ref,void * pv_ctxt)4661 void hme_fill_neighbour_mvs(
4662     mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
4663 {
4664     /* TODO : Needs to be populated for proper implementation of cost fxn */
4665     ARG_NOT_USED(pps_mv_grid);
4666     ARG_NOT_USED(i4_ctb_x);
4667     ARG_NOT_USED(i4_ctb_y);
4668     ARG_NOT_USED(i4_num_ref);
4669     ARG_NOT_USED(pv_ctxt);
4670 }
4671 
4672 /**
4673 *******************************************************************************
4674 *  @fn     void hme_get_active_pocs_list(void *pv_me_ctxt,
4675 *                                       S32 *p_pocs_buffered_in_me)
4676 *
4677 *  @brief  Returns the list of active POCs in ME ctxt
4678 *
4679 *  @param[in] pv_me_ctxt : handle to ME context
4680 *
4681 *  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4682 *                                      populates with pocs active
4683 *
4684 *  @return   void
4685 *******************************************************************************
4686 */
hme_get_active_pocs_list(void * pv_me_ctxt,S32 i4_num_me_frm_pllel)4687 WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
4688 {
4689     me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
4690     S32 i, count = 0;
4691 
4692     for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
4693     {
4694         S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
4695         S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
4696 
4697         if((i4_is_free == 0) && (poc != INVALID_POC))
4698         {
4699             count++;
4700         }
4701     }
4702     if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
4703     {
4704         return 1;
4705     }
4706     else
4707     {
4708         return 0;
4709     }
4710 }
4711 
4712 /**
4713 *******************************************************************************
4714 *  @fn     void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
4715 *                                       S32 *p_pocs_buffered_in_me)
4716 *
4717 *  @brief  Returns the list of active POCs in ME ctxt
4718 *
4719 *  @param[in] pv_me_ctxt : handle to ME context
4720 *
4721 *  @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
4722 *                                      populates with pocs active
4723 *
4724 *  @return   void
4725 *******************************************************************************
4726 */
hme_coarse_get_active_pocs_list(void * pv_me_ctxt,S32 * p_pocs_buffered_in_me)4727 void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
4728 {
4729     coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
4730     S32 i, count = 0;
4731 
4732     for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
4733     {
4734         S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
4735 
4736         if(poc != -1)
4737         {
4738             p_pocs_buffered_in_me[count] = poc;
4739             count++;
4740         }
4741     }
4742     p_pocs_buffered_in_me[count] = -1;
4743 }
4744 
hme_get_blk_size(S32 use_4x4,S32 layer_id,S32 n_layers,S32 encode)4745 S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
4746 {
4747     /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
4748     if(layer_id == n_layers - 1)
4749         return 4;
4750     else if((layer_id == 0) || (encode))
4751         return 16;
4752 
4753     /* Intermediate non encode layers use 8 */
4754     return 8;
4755 }
4756