1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file hme_defs.h
23 *
24 * \brief
25 *    Important definitions, enumerations, macros and structures used by ME
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _HME_DEFS_H_
37 #define _HME_DEFS_H_
38 
39 /*****************************************************************************/
40 /* Constant Macros                                                           */
41 /*****************************************************************************/
42 /**
43 *******************************************************************************
44 @brief Blk size of the CTB in the max possible case
45 *******************************************************************************
46  */
47 #define CTB_BLK_SIZE 64
48 
49 /**
50 *******************************************************************************
51 @brief Maximun number of results per partition
52 *******************************************************************************
53  */
54 #define MAX_RESULTS_PER_PART 2
55 
56 /**
57 *******************************************************************************
58 @brief Not used currently
59 *******************************************************************************
60  */
61 #define MAX_NUM_UNIFIED_RESULTS 10
62 #define MAX_NUM_CTB_NODES 10
63 
64 /**
65 *******************************************************************************
66 @brief For 64x64 CTB, we have 16x16 MV grid for prediction purposes (cost calc)
67 This has 1 padding at boundaries for causal neighbours
68 *******************************************************************************
69  */
70 #define CTB_MV_GRID_PAD 1
71 
72 /**
73 *******************************************************************************
74 @brief number of bits per bin
75 *******************************************************************************
76  */
77 #define HME_CABAC_BITS_PER_BIN 0.5
78 
79 /**
80 *******************************************************************************
81 @brief bin count to bit count conversion
82 *******************************************************************************
83  */
84 #define HME_GET_CAB_BIT(x) (U08(((x)*HME_CABAC_BITS_PER_BIN + 0.5)))
85 
86 /**
87 *******************************************************************************
88 @brief Columns in the MV grid
89 *******************************************************************************
90  */
91 #define NUM_COLUMNS_IN_CTB_GRID (((CTB_BLK_SIZE) >> 2) + (2 * CTB_MV_GRID_PAD))
92 
93 /**
94 *******************************************************************************
95 @brief Rows in MV grid
96 *******************************************************************************
97  */
98 #define NUM_ROWS_IN_CTB_GRID (NUM_COLUMNS_IN_CTB_GRID)
99 
100 /**
101 *******************************************************************************
102 @brief Total number of MVs held in CTB grid for prediction pourposes
103 *******************************************************************************
104  */
105 #define NUM_MVS_IN_CTB_GRID ((NUM_COLUMNS_IN_CTB_GRID) * (NUM_ROWS_IN_CTB_GRID))
106 
107 /**
108 *******************************************************************************
109 @brief Max number of candidates used for refinement during CU merge stage
110 *******************************************************************************
111  */
112 #define MAX_MERGE_CANDTS 64
113 
114 /**
115 *******************************************************************************
116 @brief For BIDIR refinement, we use 2I-P0 as input, done max at CTB level, so
117 stride for this input is 64
118 *******************************************************************************
119  */
120 #define BACK_PREDICTION_INPUT_STRIDE 64
121 
122 /**
123 *******************************************************************************
124 @brief We basically store an impossible and unique MV to identify intra blks
125 or CUs
126 *******************************************************************************
127  */
128 #define INTRA_MV 0x4000
129 
130 /**
131 *******************************************************************************
132 @brief Defines the largest CTB supported by HME
133 *******************************************************************************
134  */
135 #define HME_MAX_CTB_SIZE 64
136 
137 /**
138 *******************************************************************************
139 @brief Maximum number of 16x16 blks possible in a CTB. The basic search unit
140 in the encode layer is 16x16
141 *******************************************************************************
142  */
143 #define HME_MAX_16x16_IN_CTB ((HME_MAX_CTB_SIZE >> 4) * (HME_MAX_CTB_SIZE >> 4))
144 
145 /**
146 *******************************************************************************
147 @brief Max number of 8x8s possible in a CTB, this in other words is also the
148 maximum number of CUs possible in a CTB
149 *******************************************************************************
150  */
151 #define HME_MAX_8x8_IN_CTB ((HME_MAX_CTB_SIZE >> 3) * (HME_MAX_CTB_SIZE >> 3))
152 
153 /**
154 *******************************************************************************
155 @brief Maximum number of init candts supported for refinement search.
156 *******************************************************************************
157  */
158 #define MAX_INIT_CANDTS 60
159 
160 /**
161 *******************************************************************************
162 @brief Maximum MV in X and Y directions in fullpel units allowed in any layer
163 Any computed range for MV hasto be within this
164 *******************************************************************************
165  */
166 #define MAX_MV_X_FINEST 1024
167 #define MAX_MV_Y_FINEST 512
168 
169 #define MAX_NUM_RESULTS 10
170 
171 #define USE_MODIFIED 1
172 
173 #define ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 1
174 
175 #define ENABLE_EXPLICIT_SEARCH_IN_PQ 0
176 
177 /**
178 *******************************************************************************
179 @brief Driven by reasoning that we can tolerate an error of 4 in global mv
180  in coarsest layer per comp, assuming we have search range of 1024x512, the mv
181  range in coarse layer is 128x64, total bins is then 256/4 x 128/4 or 2K bins
182 *******************************************************************************
183  */
184 #define LOG_MAX_NUM_BINS 11
185 #define MAX_NUM_BINS (1 << LOG_MAX_NUM_BINS)
186 
187 #define NEXT_BLOCK_OFFSET_IN_L0_ME 22
188 
189 #define PREV_BLOCK_OFFSET_IN_L0_ME 6
190 
191 #define COLOCATED_BLOCK_OFFSET 2
192 
193 #define COLOCATED_4X4_NEXT_BLOCK_OFFSET 14
194 
195 #define MAP_X_MAX 16
196 
197 #define MAP_Y_MAX 16
198 
199 #define NUM_POINTS_IN_RECTANGULAR_GRID 9
200 
201 /*
202 ******************************************************************************
203 @brief Maximum number of elements in the sigmaX and sigmaX-Square array
204 computed at 4x4 level for any CU size
205 ******************************************************************************
206 */
207 #define MAX_NUM_SIGMAS_4x4 256
208 
209 /*****************************************************************************/
210 /* Function Macros                                                           */
211 /*****************************************************************************/
212 
213 /**
214 *******************************************************************************
215 @brief Calculates number of blks in picture, given width, ht, and a variable
216 shift that controls basic blk size
217 *******************************************************************************
218  */
219 #define GET_NUM_BLKS_IN_PIC(wd, ht, shift, num_cols, num_blks)                                     \
220     {                                                                                              \
221         S32 y, rnd;                                                                                \
222         rnd = (1 << shift) - 1;                                                                    \
223         num_cols = (wd + rnd) >> shift;                                                            \
224         y = (ht + rnd) >> shift;                                                                   \
225         num_blks = num_cols * y;                                                                   \
226     }
227 
228 #define COUNT_CANDS(a, b)                                                                          \
229     {                                                                                              \
230         b = (((a) & (1))) + (((a >> 1) & (1))) + (((a >> 2) & (1))) + (((a >> 3) & (1))) +         \
231             (((a >> 4) & (1))) + (((a >> 5) & (1))) + (((a >> 6) & (1))) + (((a >> 7) & (1))) +    \
232             (((a >> 8) & (1)));                                                                    \
233     }
234 
235 #define COPY_MV_TO_SEARCH_NODE(node, mv, pref, refid, shift)                                       \
236     {                                                                                              \
237         (node)->s_mv.i2_mvx = (mv)->i2_mv_x;                                                       \
238         (node)->s_mv.i2_mvy = (mv)->i2_mv_y;                                                       \
239         (node)->i1_ref_idx = *pref;                                                                \
240         (node)->u1_is_avail = 1;                                                                   \
241                                                                                                    \
242         /* Can set the availability flag for MV Pred purposes */                                   \
243         if(((node)->i1_ref_idx < 0) || ((node)->s_mv.i2_mvx == INTRA_MV))                          \
244         {                                                                                          \
245             (node)->u1_is_avail = 0;                                                               \
246             (node)->i1_ref_idx = refid;                                                            \
247             (node)->s_mv.i2_mvx = 0;                                                               \
248             (node)->s_mv.i2_mvy = 0;                                                               \
249         }                                                                                          \
250         (node)->s_mv.i2_mvx >>= (shift);                                                           \
251         (node)->s_mv.i2_mvy >>= (shift);                                                           \
252         (node)->u1_subpel_done = (shift) ? 0 : 1;                                                  \
253     }
254 
255 #define COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance)                                        \
256     {                                                                                              \
257         S32 mvx_q8 = (ps_mv)->mvx << 8;                                                            \
258         S32 mvy_q8 = (ps_mv)->mvy << 8;                                                            \
259         S32 mvcx_q8 = (ps_data)->s_centroid.i4_pos_x_q8;                                           \
260         S32 mvcy_q8 = (ps_data)->s_centroid.i4_pos_y_q8;                                           \
261                                                                                                    \
262         S32 mvdx_q8 = mvx_q8 - mvcx_q8;                                                            \
263         S32 mvdy_q8 = mvy_q8 - mvcy_q8;                                                            \
264                                                                                                    \
265         S32 mvdx = (mvdx_q8 + (1 << 7)) >> 8;                                                      \
266         S32 mvdy = (mvdy_q8 + (1 << 7)) >> 8;                                                      \
267                                                                                                    \
268         S32 mvd = ABS(mvdx) + ABS(mvdy);                                                           \
269                                                                                                    \
270         cumulative_mv_distance += mvd;                                                             \
271     }
272 
273 #define STATS_COLLECTOR_MV_INSERT(                                                                 \
274     ps_mv_store, num_mvs_stored, mvx_cur, mvy_cur, stats_struct, check_for_duplicate, ref_idx)     \
275     {                                                                                              \
276         S32 i4_j;                                                                                  \
277         (stats_struct).f_num_cands_being_processed++;                                              \
278         check_for_duplicate = 0;                                                                   \
279                                                                                                    \
280         for(i4_j = 0; i4_j < (num_mvs_stored); i4_j++)                                             \
281         {                                                                                          \
282             if(((ps_mv_store)[i4_j].s_mv.i2_mvx == (mvx_cur)) &&                                   \
283                ((ps_mv_store)[i4_j].s_mv.i2_mvy == (mvy_cur)) &&                                   \
284                ((ps_mv_store)[i4_j].i1_ref_idx == ref_idx))                                        \
285             {                                                                                      \
286                 (stats_struct).f_num_duplicates_amongst_processed++;                               \
287                 check_for_duplicate = 0;                                                           \
288                 break;                                                                             \
289             }                                                                                      \
290         }                                                                                          \
291                                                                                                    \
292         if(i4_j == (num_mvs_stored))                                                               \
293         {                                                                                          \
294             (ps_mv_store)[i4_j].s_mv.i2_mvx = (mvx_cur);                                           \
295             (ps_mv_store)[i4_j].s_mv.i2_mvy = (mvy_cur);                                           \
296             (ps_mv_store)[i4_j].i1_ref_idx = ref_idx;                                              \
297             (num_mvs_stored)++;                                                                    \
298         }                                                                                          \
299     }
300 
301 #define UPDATE_CLUSTER_METADATA_POST_MERGE(ps_cluster)                                             \
302     {                                                                                              \
303         S32 m;                                                                                     \
304                                                                                                    \
305         S32 num_clusters_evaluated = 0;                                                            \
306                                                                                                    \
307         for(m = 0; num_clusters_evaluated < (ps_cluster)->num_clusters; m++)                       \
308         {                                                                                          \
309             if(!((ps_cluster)->as_cluster_data[m].is_valid_cluster))                               \
310             {                                                                                      \
311                 if(-1 != (ps_cluster)->as_cluster_data[m].ref_id)                                  \
312                 {                                                                                  \
313                     (ps_cluster)->au1_num_clusters[(ps_cluster)->as_cluster_data[m].ref_id]--;     \
314                 }                                                                                  \
315             }                                                                                      \
316             else                                                                                   \
317             {                                                                                      \
318                 num_clusters_evaluated++;                                                          \
319             }                                                                                      \
320         }                                                                                          \
321     }
322 
323 #define SET_VALUES_FOR_TOP_REF_IDS(ps_cluster_blk, best_uni_ref, best_alt_ref, num_ref)            \
324     {                                                                                              \
325         ps_cluster_blk->best_uni_ref = best_uni_ref;                                               \
326         ps_cluster_blk->best_alt_ref = best_alt_ref;                                               \
327         ps_cluster_blk->num_refs = num_ref;                                                        \
328     }
329 
330 #define MAP_X_MAX 16
331 #define MAP_Y_MAX 16
332 
333 #define CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(                                                   \
334     ps_dedup_enabler, num_cands, mvx, mvy, check_for_duplicate)                                    \
335     {                                                                                              \
336         S32 center_mvx;                                                                            \
337         S32 center_mvy;                                                                            \
338         S32 mvdx;                                                                                  \
339         S32 mvdy;                                                                                  \
340         U32 *pu4_node_map;                                                                         \
341         S32 columnar_presence;                                                                     \
342                                                                                                    \
343         (check_for_duplicate) = 0;                                                                 \
344         {                                                                                          \
345             subpel_dedup_enabler_t *ps_dedup = &(ps_dedup_enabler)[0];                             \
346             center_mvx = ps_dedup->i2_mv_x;                                                        \
347             center_mvy = ps_dedup->i2_mv_y;                                                        \
348             pu4_node_map = ps_dedup->au4_node_map;                                                 \
349                                                                                                    \
350             mvdx = (mvx)-center_mvx;                                                               \
351             mvdy = (mvy)-center_mvy;                                                               \
352                                                                                                    \
353             if(((mvdx < MAP_X_MAX) && (mvdx >= -MAP_X_MAX)) &&                                     \
354                ((mvdy < MAP_Y_MAX) && (mvdy >= -MAP_Y_MAX)))                                       \
355             {                                                                                      \
356                 columnar_presence = pu4_node_map[MAP_X_MAX + mvdx];                                \
357                                                                                                    \
358                 if(0 == (columnar_presence & (1U << (MAP_Y_MAX + mvdy))))                          \
359                 {                                                                                  \
360                     columnar_presence |= (1U << (MAP_Y_MAX + mvdy));                               \
361                     pu4_node_map[MAP_X_MAX + mvdx] = columnar_presence;                            \
362                 }                                                                                  \
363                 else                                                                               \
364                 {                                                                                  \
365                     (check_for_duplicate) = 1;                                                     \
366                 }                                                                                  \
367             }                                                                                      \
368         }                                                                                          \
369     }
370 
371 #define BUMP_OUTLIER_CLUSTERS(ps_cluster_blk, sdi_threshold)                                       \
372     {                                                                                              \
373         outlier_data_t as_outliers[MAX_NUM_CLUSTERS_64x64 + 1];                                    \
374                                                                                                    \
375         S32 j, k;                                                                                  \
376                                                                                                    \
377         S32 num_clusters_evaluated = 0;                                                            \
378         S32 num_clusters = ps_cluster_blk->num_clusters;                                           \
379         S32 num_outliers_present = 0;                                                              \
380                                                                                                    \
381         for(j = 0; num_clusters_evaluated < num_clusters; j++)                                     \
382         {                                                                                          \
383             cluster_data_t *ps_data = &ps_cluster_blk->as_cluster_data[j];                         \
384                                                                                                    \
385             if(!ps_data->is_valid_cluster)                                                         \
386             {                                                                                      \
387                 continue;                                                                          \
388             }                                                                                      \
389                                                                                                    \
390             num_clusters_evaluated++;                                                              \
391                                                                                                    \
392             if((ps_data->num_mvs == 1) && (ps_data->as_mv[0].sdi < sdi_threshold) &&               \
393                (ps_cluster_blk->au1_num_clusters[ps_data->ref_id] >                                \
394                 MAX_NUM_CLUSTERS_IN_ONE_REF_IDX))                                                  \
395             {                                                                                      \
396                 as_outliers[num_outliers_present].cluster_id = j;                                  \
397                 as_outliers[num_outliers_present].ref_idx = ps_data->ref_id;                       \
398                 as_outliers[num_outliers_present].sdi = ps_data->as_mv[0].sdi;                     \
399                 num_outliers_present++;                                                            \
400             }                                                                                      \
401         }                                                                                          \
402                                                                                                    \
403         for(j = 0; j < (num_outliers_present - 1); j++)                                            \
404         {                                                                                          \
405             for(k = (j + 1); k < num_outliers_present; k++)                                        \
406             {                                                                                      \
407                 if(as_outliers[j].sdi > as_outliers[k].sdi)                                        \
408                 {                                                                                  \
409                     as_outliers[MAX_NUM_CLUSTERS_64x64] = as_outliers[j];                          \
410                     as_outliers[j] = as_outliers[k];                                               \
411                     as_outliers[k] = as_outliers[MAX_NUM_CLUSTERS_64x64];                          \
412                 }                                                                                  \
413             }                                                                                      \
414         }                                                                                          \
415                                                                                                    \
416         for(j = 0; j < (num_outliers_present); j++)                                                \
417         {                                                                                          \
418             S32 ref_idx = as_outliers[j].ref_idx;                                                  \
419                                                                                                    \
420             if((ps_cluster_blk->au1_num_clusters[ref_idx] > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX))      \
421             {                                                                                      \
422                 ps_cluster_blk->as_cluster_data[as_outliers[j].cluster_id].is_valid_cluster = 0;   \
423                 ps_cluster_blk->num_clusters--;                                                    \
424                 ps_cluster_blk->au1_num_clusters[ref_idx]--;                                       \
425             }                                                                                      \
426         }                                                                                          \
427     }
428 
429 #define ADD_CLUSTER_CENTROID_AS_CANDS_FOR_BLK_MERGE(                                               \
430     ps_cluster_data, ps_range_prms, ps_list, ps_mv, is_ref_in_l0, ref_idx)                         \
431     {                                                                                              \
432         ps_list = &(ps_cluster_data)->as_mv_list[!(is_ref_in_l0)][(ref_idx)];                      \
433         ps_mv = &ps_list->as_mv[ps_list->num_mvs];                                                 \
434                                                                                                    \
435         ps_mv->i2_mvx = (ps_centroid->i4_pos_x_q8 + (1 << 7)) >> 8;                                \
436         ps_mv->i2_mvy = (ps_centroid->i4_pos_y_q8 + (1 << 7)) >> 8;                                \
437                                                                                                    \
438         CLIP_MV_WITHIN_RANGE(ps_mv->i2_mvx, ps_mv->i2_mvy, (ps_range_prms), 0, 0, 0);              \
439                                                                                                    \
440         ps_cluster_data->ai4_ref_id_valid[!(is_ref_in_l0)][(ref_idx)] = 1;                         \
441                                                                                                    \
442         ps_list->num_mvs++;                                                                        \
443     }
444 
445 #define COPY_SEARCH_CANDIDATE_DATA(node, mv, pref, refid, shift)                                   \
446     {                                                                                              \
447         (node)->ps_mv->i2_mvx = (mv)->i2_mv_x;                                                     \
448         (node)->ps_mv->i2_mvy = (mv)->i2_mv_y;                                                     \
449         (node)->i1_ref_idx = *pref;                                                                \
450         (node)->u1_is_avail = 1;                                                                   \
451                                                                                                    \
452         /* Can set the availability flag for MV Pred purposes */                                   \
453         if(((node)->i1_ref_idx < 0) || ((node)->ps_mv->i2_mvx == INTRA_MV))                        \
454         {                                                                                          \
455             (node)->u1_is_avail = 0;                                                               \
456             (node)->i1_ref_idx = refid;                                                            \
457             (node)->ps_mv->i2_mvx = 0;                                                             \
458             (node)->ps_mv->i2_mvy = 0;                                                             \
459         }                                                                                          \
460         (node)->ps_mv->i2_mvx >>= (shift);                                                         \
461         (node)->ps_mv->i2_mvy >>= (shift);                                                         \
462         (node)->u1_subpel_done = (shift) ? 0 : 1;                                                  \
463     }
464 /**
465 *******************************************************************************
466 * @macro MIN_NODE
467 * @brief Returns the search node with lesser cost
468 *******************************************************************************
469  */
470 #define MIN_NODE(a, b) (((a)->i4_tot_cost < (b)->i4_tot_cost) ? (a) : (b))
471 
472 /**
473 *******************************************************************************
474 * @macro MAX_NODE
475 * @brief Returns search node with higher cost
476 *******************************************************************************
477  */
478 #define MAX_NODE(a, b) (((a)->i4_tot_cost >= (b)->i4_tot_cost) ? (a) : (b))
479 
480 /**
481 ******************************************************************************
482  *  @macro  HME_INV_WT_PRED
483  *  @brief Implements inverse of wt pred formula. Actual wt pred formula is
484  *  ((input * wt) + rnd) >> shift) + offset
485 ******************************************************************************
486 */
487 #define HME_INV_WT_PRED(inp, wt, off, shift) (((((inp) - (off)) << (shift)) + ((wt) >> 1)) / (wt))
488 #define HME_INV_WT_PRED1(inp, wt, off, shift)                                                      \
489     (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
490 
491 /**
492 ******************************************************************************
493  *  @macro  HME_WT_PRED
494  *  @brief Implements wt pred formula as per spec
495 ******************************************************************************
496 */
497 #define HME_WT_PRED(p0, p1, w0, w1, rnd, shift)                                                    \
498     (((((S32)w0) * ((S32)p0) + ((S32)w1) * ((S32)p1)) >> shift) + rnd)
499 
500 /**
501 ******************************************************************************
502  *  @macro PREFETCH_BLK
503  *  @brief Prefetches a block of data into cahce before hand
504 ******************************************************************************
505 */
506 
507 /**
508 ******************************************************************************
509  *  @macro INSERT_NEW_NODE
510  *  @brief Inserts a new search node in a list if it is unique; helps in
511            removing duplicate nodes/candidates
512 ******************************************************************************
513 */
514 #define PREFETCH_BLK(pu1_src, src_stride, lines, type)                                             \
515     {                                                                                              \
516         WORD32 ctr;                                                                                \
517         for(ctr = 0; ctr < lines; ctr++)                                                           \
518         {                                                                                          \
519             PREFETCH((char const *)pu1_src, type);                                                 \
520             pu1_src += src_stride;                                                                 \
521         }                                                                                          \
522     }
523 
524 #define INSERT_UNIQUE_NODE(                                                                        \
525     as_nodes, num_nodes, new_node, au4_map, center_x, center_y, use_hashing)                       \
526     {                                                                                              \
527         WORD32 k;                                                                                  \
528         UWORD32 map;                                                                               \
529         WORD32 delta_x, delta_y;                                                                   \
530         delta_x = (new_node).ps_mv->i2_mvx - (center_x);                                           \
531         delta_y = (new_node).ps_mv->i2_mvy - (center_y);                                           \
532         map = 0;                                                                                   \
533                                                                                                    \
534         if((use_hashing) && (delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) &&                  \
535            (delta_y < MAP_Y_MAX) && (delta_y >= (-MAP_Y_MAX)))                                     \
536         {                                                                                          \
537             map = (au4_map)[delta_x + MAP_X_MAX];                                                  \
538             if(0 == (map & (1U << (delta_y + MAP_Y_MAX))))                                         \
539             {                                                                                      \
540                 (new_node).s_mv = (new_node).ps_mv[0];                                             \
541                 (as_nodes)[(num_nodes)] = (new_node);                                              \
542                 ((num_nodes))++;                                                                   \
543                 map |= 1U << (delta_y + MAP_Y_MAX);                                                \
544                 (au4_map)[delta_x + MAP_X_MAX] = map;                                              \
545             }                                                                                      \
546         }                                                                                          \
547         else                                                                                       \
548         {                                                                                          \
549             for(k = 0; k < ((num_nodes)); k++)                                                     \
550             {                                                                                      \
551                 /* Search is this node is already present in unique list */                        \
552                 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).ps_mv->i2_mvx) &&                      \
553                    ((as_nodes)[k].s_mv.i2_mvy == (new_node).ps_mv->i2_mvy) &&                      \
554                    ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx))                            \
555                 {                                                                                  \
556                     /* This is duplicate node; need not be inserted */                             \
557                     break;                                                                         \
558                 }                                                                                  \
559             }                                                                                      \
560             if(k == ((num_nodes)))                                                                 \
561             {                                                                                      \
562                 /* Insert new node only if it is not duplicate node */                             \
563                 (new_node).s_mv = (new_node).ps_mv[0];                                             \
564                 (as_nodes)[k] = (new_node);                                                        \
565                 ((num_nodes))++;                                                                   \
566             }                                                                                      \
567         }                                                                                          \
568     }
569 
570 /**
571 ******************************************************************************
572  *  @macro INSERT_NEW_NODE
573  *  @brief Inserts a new search node in a list if it is unique; helps in
574            removing duplicate nodes/candidates
575 ******************************************************************************
576 */
577 #define INSERT_NEW_NODE_NOMAP(as_nodes, num_nodes, new_node, implicit_layer)                       \
578     {                                                                                              \
579         WORD32 k;                                                                                  \
580         if(!implicit_layer)                                                                        \
581         {                                                                                          \
582             for(k = 0; k < (num_nodes); k++)                                                       \
583             {                                                                                      \
584                 /* Search is this node is already present in unique list */                        \
585                 if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) &&                            \
586                    (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy))                              \
587                 {                                                                                  \
588                     /* This is duplicate node; need not be inserted */                             \
589                     break;                                                                         \
590                 }                                                                                  \
591             }                                                                                      \
592         }                                                                                          \
593         else                                                                                       \
594         {                                                                                          \
595             for(k = 0; k < (num_nodes); k++)                                                       \
596             {                                                                                      \
597                 /* Search is this node is already present in unique list */                        \
598                 if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) &&                            \
599                    (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy) &&                            \
600                    (as_nodes[k].i1_ref_idx == new_node.i1_ref_idx))                                \
601                 {                                                                                  \
602                     /* This is duplicate node; need not be inserted */                             \
603                     break;                                                                         \
604                 }                                                                                  \
605             }                                                                                      \
606         }                                                                                          \
607                                                                                                    \
608         if(k == (num_nodes))                                                                       \
609         {                                                                                          \
610             /* Insert new node only if it is not duplicate node */                                 \
611             as_nodes[k] = new_node;                                                                \
612             (num_nodes)++;                                                                         \
613         }                                                                                          \
614     }
615 /**
616 ******************************************************************************
617  *  @macro INSERT_NEW_NODE_NOMAP_ALTERNATE
618  *  @brief Inserts a new search node in a list if it is unique; helps in
619            removing duplicate nodes/candidates
620 ******************************************************************************
621 */
622 #define INSERT_NEW_NODE_NOMAP_ALTERNATE(as_nodes, num_nodes, new_node, result_num, part_id)        \
623     {                                                                                              \
624         WORD32 k;                                                                                  \
625         WORD32 part_id_1 = (new_node->i4_num_valid_parts > 8) ? new_node->ai4_part_id[part_id]     \
626                                                               : part_id;                           \
627         for(k = 0; k < (num_nodes); k++)                                                           \
628         {                                                                                          \
629             /* Search is this node is already present in unique list */                            \
630             if((as_nodes[k].s_mv.i2_mvx == new_node->i2_mv_x[result_num][part_id_1]) &&            \
631                (as_nodes[k].s_mv.i2_mvy == new_node->i2_mv_y[result_num][part_id_1]) &&            \
632                (as_nodes[k].i1_ref_idx == new_node->i2_ref_idx[result_num][part_id_1]))            \
633             {                                                                                      \
634                 /* This is duplicate node; need not be inserted */                                 \
635                 break;                                                                             \
636             }                                                                                      \
637         }                                                                                          \
638                                                                                                    \
639         if(k == (num_nodes))                                                                       \
640         {                                                                                          \
641             /* Insert new node only if it is not duplicate node */                                 \
642             as_nodes[k].i4_tot_cost = (WORD32)new_node->i2_tot_cost[result_num][part_id_1];        \
643             as_nodes[k].i4_mv_cost = (WORD32)new_node->i2_mv_cost[result_num][part_id_1];          \
644             as_nodes[k].s_mv.i2_mvx = new_node->i2_mv_x[result_num][part_id_1];                    \
645             as_nodes[k].s_mv.i2_mvy = new_node->i2_mv_y[result_num][part_id_1];                    \
646             as_nodes[k].i1_ref_idx = (WORD8)new_node->i2_ref_idx[result_num][part_id_1];           \
647             as_nodes[k].u1_part_id = new_node->ai4_part_id[part_id];                               \
648             (num_nodes)++;                                                                         \
649         }                                                                                          \
650     }
651 
652 #define INSERT_NEW_NODE(                                                                           \
653     as_nodes, num_nodes, new_node, implicit_layer, au4_map, center_x, center_y, use_hashing)       \
654     {                                                                                              \
655         WORD32 k;                                                                                  \
656         UWORD32 map;                                                                               \
657         WORD32 delta_x, delta_y;                                                                   \
658         delta_x = (new_node).s_mv.i2_mvx - center_x;                                               \
659         delta_y = (new_node).s_mv.i2_mvy - center_y;                                               \
660         map = 0;                                                                                   \
661         if((delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && (delta_y < MAP_Y_MAX) &&          \
662            (delta_y >= (-MAP_Y_MAX)) && (use_hashing))                                             \
663         {                                                                                          \
664             map = (au4_map)[delta_x + MAP_X_MAX];                                                  \
665             if(0 == (map & (1U << (delta_y + MAP_Y_MAX))))                                         \
666             {                                                                                      \
667                 (as_nodes)[(num_nodes)] = (new_node);                                              \
668                 (num_nodes)++;                                                                     \
669                 map |= 1U << (delta_y + MAP_Y_MAX);                                                \
670                 (au4_map)[delta_x + MAP_X_MAX] = map;                                              \
671             }                                                                                      \
672         }                                                                                          \
673         else if(!(implicit_layer))                                                                 \
674         {                                                                                          \
675             for(k = 0; k < (num_nodes); k++)                                                       \
676             {                                                                                      \
677                 /* Search is this node is already present in unique list */                        \
678                 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) &&                        \
679                    ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy))                          \
680                 {                                                                                  \
681                     /* This is duplicate node; need not be inserted */                             \
682                     break;                                                                         \
683                 }                                                                                  \
684             }                                                                                      \
685             if(k == (num_nodes))                                                                   \
686             {                                                                                      \
687                 /* Insert new node only if it is not duplicate node */                             \
688                 (as_nodes)[k] = (new_node);                                                        \
689                 (num_nodes)++;                                                                     \
690             }                                                                                      \
691         }                                                                                          \
692         else                                                                                       \
693         {                                                                                          \
694             for(k = 0; k < (num_nodes); k++)                                                       \
695             {                                                                                      \
696                 /* Search is this node is already present in unique list */                        \
697                 if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) &&                        \
698                    ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy) &&                        \
699                    ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx))                            \
700                 {                                                                                  \
701                     /* This is duplicate node; need not be inserted */                             \
702                     break;                                                                         \
703                 }                                                                                  \
704             }                                                                                      \
705             if(k == (num_nodes))                                                                   \
706             {                                                                                      \
707                 /* Insert new node only if it is not duplicate node */                             \
708                 (as_nodes)[k] = (new_node);                                                        \
709                 (num_nodes)++;                                                                     \
710             }                                                                                      \
711         }                                                                                          \
712     }
713 
714 #define COMPUTE_DIFF_MV(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh)                     \
715     {                                                                                              \
716         mvdx = (inp_node)->s_mv.i2_mvx << (inp_sh);                                                \
717         mvdy = (inp_node)->s_mv.i2_mvy << (inp_sh);                                                \
718         mvdx -= ((mv_p_x) << (pred_sh));                                                           \
719         mvdy -= ((mv_p_y) << (pred_sh));                                                           \
720     }
721 
722 #define COMPUTE_MV_DIFFERENCE(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh)               \
723     {                                                                                              \
724         mvdx = (inp_node)->ps_mv->i2_mvx << (inp_sh);                                              \
725         mvdy = (inp_node)->ps_mv->i2_mvy << (inp_sh);                                              \
726         mvdx -= ((mv_p_x) << (pred_sh));                                                           \
727         mvdy -= ((mv_p_y) << (pred_sh));                                                           \
728     }
729 
730 /**
731 ******************************************************************************
732  *  @enum  CU_MERGE_RESULT_T
733  *  @brief Describes the results of merge, whether successful or not
734 ******************************************************************************
735 */
736 typedef enum
737 {
738     CU_MERGED,
739     CU_SPLIT
740 } CU_MERGE_RESULT_T;
741 
742 /**
743 ******************************************************************************
744  *  @enum  PART_ORIENT_T
745  *  @brief Describes the orientation of partition (vert/horz, left/rt)
746 ******************************************************************************
747 */
748 typedef enum
749 {
750     VERT_LEFT,
751     VERT_RIGHT,
752     HORZ_TOP,
753     HORZ_BOT
754 } PART_ORIENT_T;
755 
756 /**
757 ******************************************************************************
758  *  @enum  GRID_PT_T
759  *  @brief For a  3x3 rect grid, nubers each pt as shown
760 *     5   2   6
761 *     1   0   3
762 *     7   4   8
763 ******************************************************************************
764 */
765 typedef enum
766 {
767     PT_C = 0,
768     PT_L = 1,
769     PT_T = 2,
770     PT_R = 3,
771     PT_B = 4,
772     PT_TL = 5,
773     PT_TR = 6,
774     PT_BL = 7,
775     PT_BR = 8,
776     NUM_GRID_PTS
777 } GRID_PT_T;
778 
779 /**
780 ******************************************************************************
781  *  @macro  IS_POW
782  *  @brief Returns whwehter a number is power of 2
783 ******************************************************************************
784 */
785 #define IS_POW_2(x) (!((x) & ((x)-1)))
786 
787 /**
788 ******************************************************************************
789  *  @macro  GRID_ALL_PTS_VALID
790  *  @brief For a 3x3 rect grid, this can be used to enable all pts in grid
791 ******************************************************************************
792 */
793 #define GRID_ALL_PTS_VALID 0x1ff
794 
795 /**
796 ******************************************************************************
797  *  @macro  GRID_DIAMOND_ENABLE_ALL
798  *  @brief If we search diamond, this enables all 5 pts of diamond (including centre)
799 ******************************************************************************
800 */
801 #define GRID_DIAMOND_ENABLE_ALL                                                                    \
802     (BIT_EN(PT_C) | BIT_EN(PT_L) | BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B))
803 
804 /**
805 ******************************************************************************
806  *  @macro  GRID_RT_3_INVALID, GRID_LT_3_INVALID,GRID_TOP_3_INVALID,GRID_BOT_3_INVALID
807  *  @brief For a square grid search, depending on where the best result is
808  *  we can optimise search for next iteration by invalidating some pts
809 ******************************************************************************
810 */
811 #define GRID_RT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR)))
812 #define GRID_LT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL)))
813 #define GRID_TOP_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR)))
814 #define GRID_BOT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR)))
815 
816 /**
817 ******************************************************************************
818  *  @enum  GMV_MVTYPE_T
819  *  @brief Defines what type of GMV we need (thin lobe for a very spiky
820  * distribution of mv or thick lobe for a blurred distrib of mvs
821 ******************************************************************************
822 */
823 typedef enum
824 {
825     GMV_THICK_LOBE,
826     GMV_THIN_LOBE,
827     NUM_GMV_LOBES
828 } GMV_MVTYPE_T;
829 
830 /**
831 ******************************************************************************
832  *  @enum  BLK_TYPE_T
833  *  @brief Defines all possible inter blks possible
834 ******************************************************************************
835 */
836 typedef enum
837 {
838     BLK_INVALID = -1,
839     BLK_4x4 = 0,
840     BLK_4x8,
841     BLK_8x4,
842     BLK_8x8,
843     BLK_4x16,
844     BLK_8x16,
845     BLK_12x16,
846     BLK_16x4,
847     BLK_16x8,
848     BLK_16x12,
849     BLK_16x16,
850     BLK_8x32,
851     BLK_16x32,
852     BLK_24x32,
853     BLK_32x8,
854     BLK_32x16,
855     BLK_32x24,
856     BLK_32x32,
857     BLK_16x64,
858     BLK_32x64,
859     BLK_48x64,
860     BLK_64x16,
861     BLK_64x32,
862     BLK_64x48,
863     BLK_64x64,
864     NUM_BLK_SIZES
865 } BLK_SIZE_T;
866 
867 /**
868 ******************************************************************************
869  *  @enum  SEARCH_COMPLEXITY_T
870  *  @brief For refinement layer, this decides the number of refinement candts
871 ******************************************************************************
872 */
873 typedef enum
874 {
875     SEARCH_CX_LOW = 0,
876     SEARCH_CX_MED = 1,
877     SEARCH_CX_HIGH = 2
878 } SEARCH_COMPLEXITY_T;
879 
880 /**
881 ******************************************************************************
882  *  @enum  CTB_BOUNDARY_TYPES_T
883  *  @brief For pictures not a multiples of CTB horizontally or vertically, we
884  *  define 4 unique cases, centre (full ctbs), bottom boundary (64x8k CTBs),
885  *  right boundary (8mx64 CTBs), and bottom rt corner (8mx8k CTB)
886 ******************************************************************************
887 */
888 typedef enum
889 {
890     CTB_CENTRE,
891     CTB_BOT_PIC_BOUNDARY,
892     CTB_RT_PIC_BOUNDARY,
893     CTB_BOT_RT_PIC_BOUNDARY,
894     NUM_CTB_BOUNDARY_TYPES,
895 } CTB_BOUNDARY_TYPES_T;
896 
897 /**
898 ******************************************************************************
899  *  @enum  SEARCH_CANDIDATE_TYPE_T
900  *  @brief Monikers for all sorts of search candidates used in ME
901 ******************************************************************************
902 */
903 typedef enum
904 {
905     ILLUSORY_CANDIDATE = -1,
906     ZERO_MV = 0,
907     ZERO_MV_ALTREF,
908     SPATIAL_LEFT0,
909     SPATIAL_TOP0,
910     SPATIAL_TOP_RIGHT0,
911     SPATIAL_TOP_LEFT0,
912     SPATIAL_LEFT1,
913     SPATIAL_TOP1,
914     SPATIAL_TOP_RIGHT1,
915     SPATIAL_TOP_LEFT1,
916     PROJECTED_COLOC0,
917     PROJECTED_COLOC1,
918     PROJECTED_COLOC2,
919     PROJECTED_COLOC3,
920     PROJECTED_COLOC4,
921     PROJECTED_COLOC5,
922     PROJECTED_COLOC6,
923     PROJECTED_COLOC7,
924     PROJECTED_COLOC_TR0,
925     PROJECTED_COLOC_TR1,
926     PROJECTED_COLOC_BL0,
927     PROJECTED_COLOC_BL1,
928     PROJECTED_COLOC_BR0,
929     PROJECTED_COLOC_BR1,
930     PROJECTED_TOP0,
931     PROJECTED_TOP1,
932     PROJECTED_TOP_RIGHT0,
933     PROJECTED_TOP_RIGHT1,
934     PROJECTED_TOP_LEFT0,
935     PROJECTED_TOP_LEFT1,
936     PROJECTED_RIGHT0,
937     PROJECTED_RIGHT1,
938     PROJECTED_BOTTOM0,
939     PROJECTED_BOTTOM1,
940     PROJECTED_BOTTOM_RIGHT0,
941     PROJECTED_BOTTOM_RIGHT1,
942     PROJECTED_BOTTOM_LEFT0,
943     PROJECTED_BOTTOM_LEFT1,
944     COLOCATED_GLOBAL_MV0,
945     COLOCATED_GLOBAL_MV1,
946     PROJECTED_TOP2,
947     PROJECTED_TOP3,
948     PROJECTED_TOP_RIGHT2,
949     PROJECTED_TOP_RIGHT3,
950     PROJECTED_TOP_LEFT2,
951     PROJECTED_TOP_LEFT3,
952     PROJECTED_RIGHT2,
953     PROJECTED_RIGHT3,
954     PROJECTED_BOTTOM2,
955     PROJECTED_BOTTOM3,
956     PROJECTED_BOTTOM_RIGHT2,
957     PROJECTED_BOTTOM_RIGHT3,
958     PROJECTED_BOTTOM_LEFT2,
959     PROJECTED_BOTTOM_LEFT3,
960     NUM_SEARCH_CAND_TYPES
961 } SEARCH_CANDIDATE_TYPE_T;
962 
963 typedef enum
964 {
965     ILLUSORY_LOCATION = -1,
966     COLOCATED,
967     COLOCATED_4x4_TR,
968     COLOCATED_4x4_BL,
969     COLOCATED_4x4_BR,
970     LEFT,
971     TOPLEFT,
972     TOP,
973     TOPRIGHT,
974     RIGHT,
975     BOTTOMRIGHT,
976     BOTTOM,
977     BOTTOMLEFT,
978     NUM_SEARCH_CAND_LOCATIONS
979 } SEARCH_CAND_LOCATIONS_T;
980 
981 /**
982 ******************************************************************************
983  *  @macros  ENABLE_mxn
984  *  @brief Enables a type or a group of partitions. ENABLE_ALL_PARTS, enables all
985  *  partitions, while others enable selected partitions. These can be used
986  *  to set the mask of active partitions
987 ******************************************************************************
988 */
989 #define ENABLE_2Nx2N (BIT_EN(PART_ID_2Nx2N))
990 #define ENABLE_2NxN (BIT_EN(PART_ID_2NxN_T) | BIT_EN(PART_ID_2NxN_B))
991 #define ENABLE_Nx2N (BIT_EN(PART_ID_Nx2N_L) | BIT_EN(PART_ID_Nx2N_R))
992 #define ENABLE_NxN                                                                                 \
993     (BIT_EN(PART_ID_NxN_TL) | BIT_EN(PART_ID_NxN_TR) | BIT_EN(PART_ID_NxN_BL) |                    \
994      BIT_EN(PART_ID_NxN_BR))
995 #define ENABLE_2NxnU (BIT_EN(PART_ID_2NxnU_T) | BIT_EN(PART_ID_2NxnU_B))
996 #define ENABLE_2NxnD (BIT_EN(PART_ID_2NxnD_T) | BIT_EN(PART_ID_2NxnD_B))
997 #define ENABLE_nLx2N (BIT_EN(PART_ID_nLx2N_L) | BIT_EN(PART_ID_nLx2N_R))
998 #define ENABLE_nRx2N (BIT_EN(PART_ID_nRx2N_L) | BIT_EN(PART_ID_nRx2N_R))
999 #define ENABLE_AMP ((ENABLE_2NxnU) | (ENABLE_2NxnD) | (ENABLE_nLx2N) | (ENABLE_nRx2N))
1000 #define ENABLE_SMP ((ENABLE_2NxN) | (ENABLE_Nx2N))
1001 #define ENABLE_ALL_PARTS                                                                           \
1002     ((ENABLE_2Nx2N) | (ENABLE_NxN) | (ENABLE_2NxN) | (ENABLE_Nx2N) | (ENABLE_AMP))
1003 #define ENABLE_SQUARE_PARTS ((ENABLE_2Nx2N) | (ENABLE_NxN))
1004 
1005 /**
1006 ******************************************************************************
1007  *  @enum  MV_PEL_RES_T
1008  *  @brief Resolution of MV fpel/hpel/qpel units. Useful for maintaining
1009  *  predictors. During fpel search, candts, predictors etc are in fpel units,
1010  *  in subpel search, they are in subpel units
1011 ******************************************************************************
1012 */
1013 typedef enum
1014 {
1015     MV_RES_FPEL,
1016     MV_RES_HPEL,
1017     MV_RES_QPEL
1018 } MV_PEL_RES_T;
1019 
1020 /**
1021 ******************************************************************************
1022  *  @enum  HME_SET_MVPRED_RES
1023  *  @brief Sets resolution for predictor bank (fpel/qpel/hpel units)
1024 ******************************************************************************
1025 */
1026 #define HME_SET_MVPRED_RES(ps_pred_ctxt, mv_pel_res) ((ps_pred_ctxt)->mv_pel = mv_pel_res)
1027 
1028 /**
1029 ******************************************************************************
1030  *  @enum  HME_SET_MVPRED_DIR
1031  *  @brief Sets the direction, meaning L0/L1. Since L0 and L1 use separate
1032  *  candts, the pred ctxt for them hasto be maintained separately
1033 ******************************************************************************
1034 */
1035 #define HME_SET_MVPRED_DIR(ps_pred_ctxt, pred_lx) ((ps_pred_ctxt)->pred_lx = pred_lx)
1036 
1037 /**
1038 ******************************************************************************
1039  *  @brief macros to clip / check mv within specified range
1040 ******************************************************************************
1041  */
1042 #define CHECK_MV_WITHIN_RANGE(x, y, range)                                                         \
1043     (((x) > (range)->i2_min_x) && ((x) < (range)->i2_max_x) && ((y) > (range)->i2_min_y) &&        \
1044      ((y) < (range)->i2_max_y))
1045 
1046 #define CONVERT_MV_LIMIT_TO_QPEL(range)                                                            \
1047     {                                                                                              \
1048         (range)->i2_max_x <<= 2;                                                                   \
1049         (range)->i2_max_y <<= 2;                                                                   \
1050         (range)->i2_min_x <<= 2;                                                                   \
1051         (range)->i2_min_y <<= 2;                                                                   \
1052     }
1053 
1054 #define CONVERT_MV_LIMIT_TO_FPEL(range)                                                            \
1055     {                                                                                              \
1056         (range)->i2_max_x >>= 2;                                                                   \
1057         (range)->i2_max_y >>= 2;                                                                   \
1058         (range)->i2_min_x >>= 2;                                                                   \
1059         (range)->i2_min_y >>= 2;                                                                   \
1060     }
1061 
1062 /**
1063 ******************************************************************************
1064  *  @brief Swicth to debug the number of subpel search nodes
1065 ******************************************************************************
1066 */
1067 #define DEBUG_SUBPEL_SEARCH_NODE_HS_COUNT 0
1068 
1069 /**
1070 ******************************************************************************
1071  *  @typedef  SAD_GRID_T
1072  *  @brief Defines a 2D array type used to store SADs across grid and across
1073  * partition types
1074 ******************************************************************************
1075 */
1076 typedef S32 SAD_GRID_T[9][MAX_NUM_PARTS];
1077 
1078 /*****************************************************************************/
1079 /* Structures                                                                */
1080 /*****************************************************************************/
1081 
1082 /**
1083 ******************************************************************************
1084  *  @struct  grid_node_t
1085  *  @brief stores a complete info for a candt
1086 ******************************************************************************
1087 */
1088 typedef struct
1089 {
1090     S16 i2_mv_x;
1091     S16 i2_mv_y;
1092     S08 i1_ref_idx;
1093 } grid_node_t;
1094 
1095 /**
1096 ******************************************************************************
1097  *  @struct  search_node_t
1098  *  @brief   Basic structure used for storage of search results, specification
1099  *  of init candidates for search etc. This structure is complete for
1100  *  specification of mv and cost for a given direction of search (L0/L1) but
1101  *  does not carry information of what type of partition it represents.
1102 ******************************************************************************
1103  */
1104 typedef struct
1105 {
1106     /** Motion vector */
1107     mv_t s_mv;
1108 
1109     /** Used in the hme_mv_clipper function to reduce loads and stores */
1110     mv_t *ps_mv;
1111 
1112     /** Ref id, as specified in terms of Lc, unified list */
1113     S08 i1_ref_idx;
1114 
1115     /** Flag to indicate whether mv is in fpel or QPEL units */
1116     U08 u1_subpel_done;
1117 
1118     /**
1119      * Indicates whether this node constitutes a valid predictor candt.
1120      * Since this structure also used for predictor candts, some candts may
1121      * not be available (anti causal or outside pic boundary). Availabilit
1122      * can be inferred using this flag.
1123      */
1124     U08 u1_is_avail;
1125 
1126     /**
1127      * Indicates partition Id to which this node belongs. Useful during
1128      * subpel / fullpel refinement search to identify partition whose
1129      * cost needs to be minimized
1130      */
1131     U08 u1_part_id;
1132 
1133     /** SAD / SATD stored here */
1134     S32 i4_sad;
1135 
1136     /**
1137      * Cost related to coding MV, multiplied by lambda
1138      * TODO : Entry may be redundant, can be removed
1139      */
1140     S32 i4_mv_cost;
1141 
1142     /** Total cost, (SAD + MV Cost) */
1143     S32 i4_tot_cost;
1144 
1145     /** Subpel_Dist_Improvement.
1146         It is the reduction in distortion (SAD or SATD) achieved
1147         from the full-pel stage to the sub-pel stage
1148     */
1149     S32 i4_sdi;
1150 
1151 } search_node_t;
1152 
1153 /**
1154 ******************************************************************************
1155  *  @macro  INIT_SEARCH_NODE
1156  *  @brief   Initializes this search_node_t structure. Can be used to zero
1157  *          out candts, set max costs in results etc
1158 ******************************************************************************
1159  */
1160 #define INIT_SEARCH_NODE(x, a)                                                                     \
1161     {                                                                                              \
1162         (x)->s_mv.i2_mvx = 0;                                                                      \
1163         (x)->s_mv.i2_mvy = 0;                                                                      \
1164         (x)->i1_ref_idx = a;                                                                       \
1165         (x)->i4_tot_cost = MAX_32BIT_VAL;                                                          \
1166         (x)->i4_sad = MAX_32BIT_VAL;                                                               \
1167         (x)->u1_subpel_done = 0;                                                                   \
1168         (x)->u1_is_avail = 1;                                                                      \
1169     }
1170 
1171 /**
1172 ******************************************************************************
1173  *  @struct  part_attr_t
1174  *  @brief   Geometric description of a partition w.r.t. CU start. Note that
1175  *           since this is used across various CU sizes, the inference of
1176  *           these members is to be done in the context of specific usage
1177 ******************************************************************************
1178  */
1179 typedef struct
1180 {
1181     /** Start of partition w.r.t. CU start in x dirn */
1182     U08 u1_x_start;
1183     /** Size of partitino w.r.t. CU start in x dirn */
1184     U08 u1_x_count;
1185     /** Start of partition w.r.t. CU start in y dirn */
1186     U08 u1_y_start;
1187     /** Size of partitino w.r.t. CU start in y dirn */
1188     U08 u1_y_count;
1189 } part_attr_t;
1190 
1191 /**
1192 ******************************************************************************
1193  *  @struct  search_candt_t
1194  *  @brief   Complete information for a given candt in any refinement srch
1195 ******************************************************************************
1196  */
1197 typedef struct
1198 {
1199     /** Points to the mv, ref id info. */
1200     search_node_t *ps_search_node;
1201     /** Number of refinemnts to be done for this candt */
1202     U08 u1_num_steps_refine;
1203 } search_candt_t;
1204 
1205 /**
1206 ******************************************************************************
1207  *  @struct  result_node_t
1208  *  @brief   Contains complete search result for a CU for a given type of
1209  *           partition split. Holds ptrs to results for each partition, with
1210  *           information of partition type.
1211 ******************************************************************************
1212  */
1213 typedef struct
1214 {
1215     /**
1216      * Type of partition that the CU is split into, for which this
1217      * result is relevant
1218      */
1219     PART_TYPE_T e_part_type;
1220 
1221     /**
1222      * Total cost of coding the CU (sum of costs of individual partitions
1223      * plus other possible CU level overheads)
1224      */
1225     S32 i4_tot_cost;
1226 
1227     /**
1228      * Pointer to results of each individual partitions. Note that max
1229      * number of partitions a CU can be split into is MAX_NUM_PARTS
1230      */
1231     search_node_t *ps_part_result[MAX_NUM_PARTS];
1232 
1233     /* TU split flag : tu_split_flag[0] represents the transform splits
1234      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
1235      *  to respective 32x32  */
1236     S32 ai4_tu_split_flag[4];
1237 
1238 } result_node_t;
1239 
1240 /**
1241 ******************************************************************************
1242  *  @struct  ctb_node_t
1243  *  @brief   Finalized information for a given CU or CTB. This is a recursive
1244  *           structure and can hence start at CTB level, recursing for every
1245  *           level of split till we hit leaf CUs in the CTB. At leaf node
1246  *           it contains info for coded non split CU, with child nodes being
1247  *           set to NULL
1248 ******************************************************************************
1249  */
1250 typedef struct ctb_node_t
1251 {
1252     /** x offset of this CU w.r.t. CTB start (0-63) */
1253     U08 u1_x_off;
1254     /** y offset of this C U w.r.t. CTB start (0-63) */
1255     U08 u1_y_off;
1256     /** Results of each partition in both directions L0,L1 */
1257     search_node_t as_part_results[MAX_NUM_PARTS][2];
1258     /**
1259      * Pointers to pred buffers. Note that the buffer may be allocated
1260      * at parent level or at this level
1261      */
1262     U08 *apu1_pred[2];
1263     /** Prediction direction for each partition: 0-L0, 1-L1, 2-BI */
1264     U08 u1_pred_dir[MAX_NUM_PARTS];
1265     /**
1266      * When pred direction is decided to be BI, we still store the best
1267      * uni pred dir (L0/L1) in this array, for RD Opt purposes
1268      */
1269     U08 u1_best_uni_dir[MAX_NUM_PARTS];
1270     /** Stride of pred buffer pointed to by apu1_pred member */
1271     S32 i4_pred_stride;
1272     /** Size of the CU that this node represents */
1273     CU_SIZE_T e_cu_size;
1274     /** For leaf CUs, this indicats type of partition (for e.g. PRT_2NxN) */
1275     PART_TYPE_T e_part_type;
1276     /** Below entries are for a CU level*/
1277     S32 i4_sad;
1278     S32 i4_satd;
1279     S32 i4_mv_cost;
1280     S32 i4_rate;
1281     S32 i4_dist;
1282     S32 i4_tot_cost;
1283     /** Best costs of each partitions, if partition is BI, then best cost across uni/bi */
1284     S32 ai4_part_costs[4];
1285 
1286     /* TU split flag : tu_split_flag[0] represents the transform splits
1287      *  for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
1288      *  to respective 32x32  */
1289     /* For a 8x8 TU - 1 bit used to indicate split */
1290     /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
1291     /* For a 32x32 TU - See above */
1292     S32 ai4_tu_split_flag[4];
1293 
1294     /**
1295      * pointers to child nodes. If this node is split, then the below point
1296      * to children nodes (TL, TR, BL, BR) each of quarter size (w/2, h/2)
1297      * If this node not split, then below point to null
1298      */
1299     struct ctb_node_t *ps_tl;
1300     struct ctb_node_t *ps_tr;
1301     struct ctb_node_t *ps_bl;
1302     struct ctb_node_t *ps_br;
1303 } ctb_node_t;
1304 
1305 /**
1306 ******************************************************************************
1307  *  @struct  ctb_mem_mgr_t
1308  *  @brief   Memory manager structure for CTB level memory allocations of CTB
1309  *           nodes
1310 ******************************************************************************
1311  */
1312 typedef struct
1313 {
1314     /** Base memory ptr */
1315     U08 *pu1_mem;
1316     /** Amount used so far (running value) */
1317     S32 i4_used;
1318     /** Total memory available for this mem mgr */
1319     S32 i4_tot;
1320 
1321     /** Size of CTB node, and alignment requiremnts */
1322     S32 i4_size;
1323     S32 i4_align;
1324 } ctb_mem_mgr_t;
1325 
1326 /**
1327 ******************************************************************************
1328  *  @struct  buf_mgr_t
1329  *  @brief   Memory manager structure for CTB level buffer allocations on the
1330  *           fly, esp useful for pred bufs and working memory
1331 ******************************************************************************
1332  */
1333 typedef struct
1334 {
1335     /** base memory ptr */
1336     U08 *pu1_wkg_mem;
1337     /** total memory available */
1338     S32 i4_total;
1339     /** Memory used so far */
1340     S32 i4_used;
1341 } buf_mgr_t;
1342 
1343 /**
1344 ******************************************************************************
1345  *  @struct  pred_candt_nodes_t
1346  *  @brief   For a given partition and a given CU/blk, this has pointers to
1347  *           all the neighbouring and coloc pred candts. All the pred candts
1348  *           are stored as search_node_t structures itself.
1349 ******************************************************************************
1350  */
1351 typedef struct
1352 {
1353     search_node_t *ps_tl;
1354     search_node_t *ps_t;
1355     search_node_t *ps_tr;
1356     search_node_t *ps_bl;
1357     search_node_t *ps_l;
1358     search_node_t *ps_coloc;
1359     search_node_t *ps_zeromv;
1360     search_node_t **pps_proj_coloc;
1361 
1362     search_node_t *ps_mvp_node;
1363 } pred_candt_nodes_t;
1364 
1365 /**
1366 ******************************************************************************
1367  *  @struct  pred_ctxt_t
1368  *  @brief   For a given CU/blk, has complete prediction information for all
1369  *           types of partitions. Note that the pred candts are only pointed
1370  *           to, not actually stored here. This indirection is to avoid
1371  *           copies after each partition search, this way, the result of
1372  *           a partition is updated and the causally next partition
1373  *           automatically uses this result
1374 ******************************************************************************
1375  */
1376 typedef struct
1377 {
1378     pred_candt_nodes_t as_pred_nodes[TOT_NUM_PARTS];
1379 
1380     /**
1381      *  We use S + lambda * R to evaluate cost. Here S = SAD/SATD and lambda
1382      *  is the scaling of bits to S and R is bits of overhead (MV + mode).
1383      *  Choice of lambda depends on open loop / closed loop, Qp, temporal id
1384      *  and possibly CU depth. It is the caller's responsiblity to pass
1385      *  to this module the appropriate lambda.
1386      */
1387     S32 lambda;
1388 
1389     /** lambda is in Q format, so this is the downshift reqd */
1390     S32 lambda_q_shift;
1391 
1392     /** Prediction direction : PRED_L0 or PRED_L1 */
1393     S32 pred_lx;
1394 
1395     /** MV resolution: FPEL, HPEL or QPEL */
1396     S32 mv_pel;
1397 
1398     /** Points to the ref bits lookup 1 ptr for each PRED_Lx */
1399     U08 **ppu1_ref_bits_tlu;
1400 
1401     /**
1402      *  Points to the ref scale factor, for a given ref id k,
1403      *  to scale as per ref id m, we use entry k+MAX_NUM_REF*m
1404      */
1405     S16 *pi2_ref_scf;
1406 
1407     /**
1408      *  Flag that indicates whether T, TR and TL candidates used
1409      *  are causal or projected
1410      */
1411     U08 proj_used;
1412 
1413 } pred_ctxt_t;
1414 
1415 /**
1416 ******************************************************************************
1417  *  @struct  search_results_t
1418  *  @brief   For a given CU/blk, Stores all the results of ME search. Results
1419  *           are stored per partition, also the best results for CU are stored
1420  *           across partitions.
1421 ******************************************************************************
1422  */
1423 typedef struct
1424 {
1425     /** Size of CU for which this structure used */
1426     CU_SIZE_T e_cu_size;
1427 
1428     /**
1429      * X and y offsets w.r.t. CTB start in encode layers. For non encode
1430      * layers, these may typically be 0
1431      */
1432     U08 u1_x_off;
1433     U08 u1_y_off;
1434 
1435     /** Number of best results for this CU stored */
1436     U08 u1_num_best_results;
1437 
1438     /** Number of results stored per partition. */
1439     U08 u1_num_results_per_part;
1440 
1441     /**
1442      * Number of result planes active. This may be different from total
1443      * number of active references during search. For example, we may
1444      * have 4 active ref, 2 ineach dirn, but active result planes may
1445      * only be 2, one for L0 and 1 for L1
1446      */
1447     U08 u1_num_active_ref;
1448     /**
1449      * mask of active partitions, Totally 17 bits. For a given partition
1450      * id, as per PART_ID_T enum the corresponding bit position is 1/0
1451      * indicating that partition is active or inactive
1452      */
1453     S32 i4_part_mask;
1454 
1455     /** Points to partial results for each partition id
1456      *  Temporary hack for the bug: If +1 is not kept,
1457      *  it doesn't bit match with older version
1458      */
1459     search_node_t *aps_part_results[MAX_NUM_REF][TOT_NUM_PARTS];
1460 
1461     /**
1462      * Ptr to best results for the current CU post bi pred evaluation and
1463      * intra mode insertions
1464      */
1465     inter_cu_results_t *ps_cu_results;
1466 
1467     /** 2 pred ctxts, one for L0 and one for L1 */
1468     pred_ctxt_t as_pred_ctxt[2];
1469 
1470     /**
1471      * Pointer to a table that indicates whether the ref id
1472      * corresponds to past or future dirn. Input is ref id Lc form
1473      */
1474 
1475     U08 *pu1_is_past;
1476 
1477     /**
1478      * Overall best CU cost, while other entries store CU costs
1479      * in single direction, this is best CU cost, where each
1480      * partition cost is evaluated as best of uni/bi
1481      */
1482     S32 best_cu_cost;
1483 
1484     /**
1485      * Split_flag which is used for deciding if 16x16 CU is split or not
1486      */
1487     U08 u1_split_flag;
1488 } search_results_t;
1489 
1490 /**
1491 ******************************************************************************
1492  *  @struct  ctb_list_t
1493  *  @brief   Tree structure containing info for entire CTB. At top level
1494  *           it points to entire CTB results, with children nodes at each lvl
1495  *           being non null if split.
1496 ******************************************************************************
1497  */
1498 typedef struct ctb_list_t
1499 {
1500     /** Indicates whether this level split further */
1501     U08 u1_is_split;
1502 
1503     /** Number of result candts present */
1504     U08 u1_num_candts;
1505 
1506     /**
1507      * Whether this level valid. E.g. if we are at boundary, where only
1508      * left 2 32x32 are within pic boundary, then the parent is force split
1509      * at the children level, TR and BR are invalid.
1510      */
1511     U08 u1_is_valid;
1512 
1513     /**
1514      * IF this level is 16x16 then this mask indicates which 8x8 blks
1515      * are valid
1516      */
1517     U08 u1_8x8_mask;
1518 
1519     /** Search results of this CU */
1520     search_results_t *ps_search_results;
1521 
1522     /** Search results of this CU */
1523     inter_cu_results_t *ps_cu_results;
1524 
1525     /** Pointers to leaf nodes, if CU is split further, else null */
1526     struct ctb_list_t *ps_tl;
1527     struct ctb_list_t *ps_tr;
1528     struct ctb_list_t *ps_bl;
1529     struct ctb_list_t *ps_br;
1530 } ctb_list_t;
1531 
1532 /**
1533 ******************************************************************************
1534  *  @struct  layer_mv_t
1535  *  @brief   mv bank structure for a particular layer
1536 ******************************************************************************
1537  */
1538 typedef struct
1539 {
1540     /** Number of mvs for a given ref/pred dirn */
1541     S32 i4_num_mvs_per_ref;
1542     /** Number of reference for which results stored */
1543     S32 i4_num_ref;
1544     /** Number of mvs stored per blk. Product of above two */
1545     S32 i4_num_mvs_per_blk;
1546     /** Block size of the unit for which MVs stored */
1547     BLK_SIZE_T e_blk_size;
1548     /** Number of blocks present per row */
1549     S32 i4_num_blks_per_row;
1550 
1551     /** Number of mvs stored every row */
1552     S32 i4_num_mvs_per_row;
1553 
1554     /**
1555      * Max number of mvs allowed per row. The main purpose of this variable
1556      * is to resolve or detect discrepanceis between allocation time mem
1557      * and run time mem, when alloc time resolution and run time resolution
1558      * may be different
1559      */
1560     S32 max_num_mvs_per_row;
1561 
1562     /**
1563      * Pointer to mvs of 0, 0 blk, This is different from base since the
1564      * mv bank is padded all sides
1565     */
1566     hme_mv_t *ps_mv;
1567 
1568     /** Pointer to base of mv bank mvs */
1569     hme_mv_t *ps_mv_base;
1570 
1571     /** Pointers to ref idx.One to one correspondence between this and ps_mv*/
1572     S08 *pi1_ref_idx;
1573     /** Base of ref ids just like in case of ps_mv */
1574     S08 *pi1_ref_idx_base;
1575 
1576     /** Part mask for every blk, if stored, 1 per blk */
1577     U08 *pu1_part_mask;
1578 } layer_mv_t;
1579 
1580 /**
1581 ******************************************************************************
1582  *  @struct  mv_hist_t
1583  *  @brief   Histogram structure to calculate global mvs
1584 ******************************************************************************
1585  */
1586 typedef struct
1587 {
1588     S32 i4_num_rows;
1589     S32 i4_num_cols;
1590     S32 i4_shift_x;
1591     S32 i4_shift_y;
1592     S32 i4_lobe1_size;
1593     S32 i4_lobe2_size;
1594     S32 i4_min_x;
1595     S32 i4_min_y;
1596     S32 i4_num_bins;
1597     S32 ai4_bin_count[MAX_NUM_BINS];
1598 } mv_hist_t;
1599 
1600 typedef struct
1601 {
1602     U08 u1_is_past;
1603 } ref_attr_t;
1604 
1605 /**
1606 ******************************************************************************
1607  *  @struct  layer_ctxt_t
1608  *  @brief   Complete information for the layer
1609 ******************************************************************************
1610  */
1611 typedef struct
1612 {
1613     /** Display Width of this layer */
1614     S32 i4_disp_wd;
1615     /** Display height of this layer */
1616     S32 i4_disp_ht;
1617     /** Width of this layer */
1618     S32 i4_wd;
1619     /** height of this layer */
1620     S32 i4_ht;
1621     /** Amount of padding of input in x dirn */
1622     S32 i4_pad_x_inp;
1623     /** Amount of padding of input in y dirn */
1624     S32 i4_pad_y_inp;
1625     /** Padding amount of recon in x dirn */
1626     S32 i4_pad_x_rec;
1627     /** padding amt of recon in y dirn */
1628     S32 i4_pad_y_rec;
1629 
1630     /**
1631      * Offset for recon. Since recon has padding, the 0, 0 start differs
1632      * from base of buffer
1633      */
1634     S32 i4_rec_offset;
1635     /** Offset for input, same explanation as recon */
1636     S32 i4_inp_offset;
1637     /** stride of input buffer */
1638     S32 i4_inp_stride;
1639     /** stride of recon buffer */
1640     S32 i4_rec_stride;
1641     /** Pic order count */
1642     S32 i4_poc;
1643     /** input pointer. */
1644     U08 *pu1_inp;
1645     /** Base of input. Add inp_offset to go to 0, 0 locn */
1646     U08 *pu1_inp_base;
1647 
1648     /** Pointer to 4 hpel recon planes */
1649     U08 *pu1_rec_fxfy;
1650     U08 *pu1_rec_hxfy;
1651     U08 *pu1_rec_fxhy;
1652     U08 *pu1_rec_hxhy;
1653 
1654     /** Global mv, one set per reference searched */
1655     hme_mv_t s_global_mv[MAX_NUM_REF][NUM_GMV_LOBES];
1656 
1657     /** Layer MV bank */
1658     layer_mv_t *ps_layer_mvbank;
1659 
1660     /** Pointer to list of recon buffers for each ref id, one ptr per plane */
1661     U08 **ppu1_list_rec_fxfy;
1662     U08 **ppu1_list_rec_hxfy;
1663     U08 **ppu1_list_rec_fxhy;
1664     U08 **ppu1_list_rec_hxhy;
1665 
1666     void **ppv_dep_mngr_recon;
1667 
1668     /** Pointer to list of input buffers for each ref id, one ptr per plane */
1669     U08 **ppu1_list_inp;
1670 
1671     /** Max MV in x and y direction supported at this layer resolution */
1672     S16 i2_max_mv_x;
1673     S16 i2_max_mv_y;
1674 
1675     /** Converts ref id (as per Lc list) to POC */
1676     S32 ai4_ref_id_to_poc_lc[MAX_NUM_REF];
1677 
1678     S32 ai4_ref_id_to_disp_num[MAX_NUM_REF];
1679 
1680     /** status of the buffer */
1681     S32 i4_is_free;
1682 
1683     /** idr gop number */
1684     S32 i4_idr_gop_num;
1685 
1686     /** is reference picture */
1687     S32 i4_is_reference;
1688 
1689     /** is non reference picture processed by me*/
1690     S32 i4_non_ref_free;
1691 
1692 } layer_ctxt_t;
1693 
1694 typedef S32 (*PF_MV_COST_FXN)(search_node_t *, pred_ctxt_t *, PART_ID_T, S32);
1695 
1696 /**
1697  ******************************************************************************
1698  *  @struct refine_prms_t
1699  *  @brief  All the configurable input parameters for the refinement layer
1700  *
1701  *  @param encode: Whether this layer is encoded or not
1702  *  @param explicit_ref: If enabled, then the number of reference frames to
1703  *                       be searched is a function of coarsest layer num ref
1704                          frames. Else, number of references collapsed to 1/2
1705  *  @param i4_num_fpel_results : Number of full pel results to be allowed
1706  *  @param i4_num_results_per_part: Number of results stored per partition
1707  *  @param e_search_complexity: Decides the number of initial candts, refer
1708  *                               to SEARCH_COMPLEXITY_T
1709  *  @param i4_use_rec_in_fpel: Whether to use input buf or recon buf in fpel
1710  *  @param i4_enable_4x4_part : if encode is 0, we use 8x8 blks, if this param
1711                                 enabled, then we do 4x4 partial sad update
1712  *  @param i4_layer_id        : id of this layer (0 = finest)
1713  *  @param i4_num_32x32_merge_results: number of 32x32 merged results stored
1714  *  @param i4_num_64x64_merge_results: number of 64x64 merged results stored
1715  *  @param i4_use_satd_cu_merge: Use SATD during CU merge
1716  *  @param i4_num_steps_hpel_refine : Number of steps during hpel refinement
1717  *  @param i4_num_steps_qpel_refine : Same as above but for qpel
1718  *  @param i4_use_satd_subpel : Use of SATD or SAD for subpel
1719  ******************************************************************************
1720 */
1721 typedef struct
1722 {
1723     /* This array is used to place upper bounds on the number of search candidates */
1724     /* that can be used per 'search cand location' */
1725     U08 au1_num_fpel_search_cands[NUM_SEARCH_CAND_LOCATIONS];
1726 
1727     U08 u1_max_2nx2n_tu_recur_cands;
1728 
1729     U08 u1_max_num_fpel_refine_centers;
1730 
1731     U08 u1_max_num_subpel_refine_centers;
1732 
1733     S32 i4_encode;
1734     S32 explicit_ref;
1735     S32 i4_num_ref_fpel;
1736     S32 i4_num_fpel_results;
1737 
1738     S32 i4_num_results_per_part;
1739 
1740     S32 i4_num_mvbank_results;
1741     SEARCH_COMPLEXITY_T e_search_complexity;
1742     S32 i4_use_rec_in_fpel;
1743 
1744     S32 i4_enable_4x4_part;
1745     S32 i4_layer_id;
1746 
1747     S32 i4_num_32x32_merge_results;
1748     S32 i4_num_64x64_merge_results;
1749 
1750     S32 i4_use_satd_cu_merge;
1751 
1752     S32 i4_num_steps_post_refine_fpel;
1753     S32 i4_num_steps_fpel_refine;
1754     S32 i4_num_steps_hpel_refine;
1755     S32 i4_num_steps_qpel_refine;
1756     S32 i4_use_satd_subpel;
1757 
1758     double *pd_intra_costs;
1759     S32 bidir_enabled;
1760     S32 lambda_inp;
1761     S32 lambda_recon;
1762     S32 lambda_q_shift;
1763 
1764     S32 limit_active_partitions;
1765 
1766     S32 sdi_threshold;
1767 
1768     U08 u1_use_lambda_derived_from_min_8x8_act_in_ctb;
1769 
1770     U08 u1_max_subpel_candts;
1771 
1772     U08 u1_max_subpel_candts_2Nx2N;
1773     U08 u1_max_subpel_candts_NxN;
1774 
1775     U08 u1_subpel_candt_threshold;
1776 
1777     /* Pointer to the array which has num best results for
1778         fpel refinement */
1779     U08 *pu1_num_best_results;
1780 
1781 } refine_prms_t;
1782 
1783 /**
1784 ******************************************************************************
1785  *  @struct  coarse_prms_t
1786  *  @brief   All the parameters passed to coarse layer search
1787 ******************************************************************************
1788  */
1789 typedef struct
1790 {
1791     /** ID of this layer, typically N-1 where N is tot layers */
1792     S32 i4_layer_id;
1793 
1794     /** Initial step size, valid if full search disabled */
1795     S32 i4_start_step;
1796 
1797     /** Maximum number of iterations to consider if full search disabled */
1798     S32 i4_max_iters;
1799 
1800     /** Number of reference frames to search */
1801     S32 i4_num_ref;
1802 
1803     /** Number of best results to maintain at this layer for projection */
1804     S32 num_results;
1805 
1806     /**
1807      * Enable or disable full search, if disabled then, we search around initial
1808      * candidates with early exit
1809      */
1810     S32 do_full_search;
1811 
1812     /** Values of lambda and the Q format */
1813     S32 lambda;
1814     S32 lambda_q_shift;
1815 
1816     /** Step size for full search 2/4 */
1817     S32 full_search_step;
1818 
1819 } coarse_prms_t;
1820 
1821 typedef struct
1822 {
1823     /**
1824      * These pointers point to modified input, one each for one ref idx.
1825      * Instead of weighting the reference, we weight the input with inverse
1826      * wt and offset.
1827      * +1 for storing non weighted input
1828      */
1829     U08 *apu1_wt_inp[MAX_NUM_REF + 1];
1830 
1831     /* These are allocated once at the start of encoding */
1832     /* These are necessary only if wt_pred is switched on */
1833     /* Else, only a single buffer is used to store the */
1834     /* unweighed input */
1835     U08 *apu1_wt_inp_buf_array[MAX_NUM_REF + 1];
1836 
1837     /** Stores the weights and offsets for each ref */
1838     S32 a_wpred_wt[MAX_NUM_REF];
1839     S32 a_inv_wpred_wt[MAX_NUM_REF];
1840     S32 a_wpred_off[MAX_NUM_REF];
1841     S32 wpred_log_wdc;
1842 
1843     S32 ai4_shift_val[MAX_NUM_REF];
1844 } wgt_pred_ctxt_t;
1845 
1846 /**
1847 ******************************************************************************
1848  *  @struct  mv_refine_ctxt_t
1849  *  @brief   This structure contains important parameters used motion vector
1850              refinement
1851 ******************************************************************************
1852  */
1853 typedef struct
1854 {
1855     /* Added +7 in the array sizes below to make every array dimension
1856     16-byte aligned */
1857     /** Cost of best candidate for each partition*/
1858     MEM_ALIGN16 WORD16 i2_tot_cost[2][TOT_NUM_PARTS + 7];
1859 
1860     MEM_ALIGN16 WORD16 i2_stim_injected_cost[2][TOT_NUM_PARTS + 7];
1861 
1862     /** Motion vector cost for the best candidate of each partition*/
1863     MEM_ALIGN16 WORD16 i2_mv_cost[2][TOT_NUM_PARTS + 7];
1864     /** X component of the motion vector of the best candidate of each partition*/
1865     MEM_ALIGN16 WORD16 i2_mv_x[2][TOT_NUM_PARTS + 7];
1866     /** Y component of the motion vector of the best candidate of each partition*/
1867     MEM_ALIGN16 WORD16 i2_mv_y[2][TOT_NUM_PARTS + 7];
1868     /** Reference index of the best candidate of each partition*/
1869     MEM_ALIGN16 WORD16 i2_ref_idx[2][TOT_NUM_PARTS + 7];
1870 
1871     /** Partition id for the various partitions*/
1872     WORD32 ai4_part_id[TOT_NUM_PARTS + 1];
1873     /** Indicates the total number of valid partitions*/
1874     WORD32 i4_num_valid_parts;
1875 
1876     /** Number of candidates to refine through*/
1877     WORD32 i4_num_search_nodes;
1878 
1879     /** Stores the satd at the end of fullpel refinement*/
1880     WORD16 ai2_fullpel_satd[2][TOT_NUM_PARTS];
1881 } mv_refine_ctxt_t;
1882 
1883 typedef mv_refine_ctxt_t fullpel_refine_ctxt_t;
1884 typedef mv_refine_ctxt_t subpel_refine_ctxt_t;
1885 /**
1886 ******************************************************************************
1887  *  @struct  hme_search_prms_t
1888  *  @brief   All prms going to any fpel search
1889 ******************************************************************************
1890  */
1891 typedef struct
1892 {
1893     /** for explicit search, indicates which ref frm to search */
1894     /** for implicit search, indicates the prediction direction for search */
1895     S08 i1_ref_idx;
1896 
1897     /** Blk size used for search, and for which the search is done */
1898     BLK_SIZE_T e_blk_size;
1899 
1900     /** Number of init candts being searched */
1901     S32 i4_num_init_candts;
1902 
1903     S32 i4_num_steps_post_refine;
1904 
1905     /**
1906      * For coarser searches, bigger refinement is done around each candt
1907      * in these cases, this prm has start step
1908      */
1909     S32 i4_start_step;
1910 
1911     /** whether SATD to be used for srch */
1912     S32 i4_use_satd;
1913 
1914     /** if 1, we use recon frm for search (closed loop ) */
1915     S32 i4_use_rec;
1916 
1917     /** bitmask of active partitions */
1918     S32 i4_part_mask;
1919 
1920     /** x and y offset of blk w.r.t. pic start */
1921     S32 i4_x_off;
1922     S32 i4_y_off;
1923 
1924     /**
1925      * max number of iterations to search if early exit not hit
1926      * relevant only for coarser searches
1927      */
1928     S32 i4_max_iters;
1929 
1930     /** pointer to str holding all results for this blk */
1931     search_results_t *ps_search_results;
1932 
1933     /** pts to str having all search candt with refinement info */
1934     search_candt_t *ps_search_candts;
1935     /** pts to str having valid mv range info for this blk */
1936     range_prms_t *aps_mv_range[MAX_NUM_REF];
1937     /** cost compute fxnptr */
1938     PF_MV_COST_FXN pf_mv_cost_compute;
1939 
1940     /** when this str is set up for full search, indicates step size for same */
1941     S32 full_search_step;
1942 
1943     /** stride ofinp buffer */
1944     S32 i4_inp_stride;
1945 
1946     /** x and y offset of cu w.r.t. ctb start, set to 0 for non enc layer */
1947     S32 i4_cu_x_off;
1948     S32 i4_cu_y_off;
1949 
1950     /** base pointer to the de-duplicated search nodes */
1951     search_node_t *ps_search_nodes;
1952 
1953     /** number of de-duplicated nodes to be searched */
1954     S32 i4_num_search_nodes;
1955 
1956     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
1957 
1958     U32 au4_src_variance[TOT_NUM_PARTS];
1959 
1960     S32 i4_alpha_stim_multiplier;
1961 
1962     U08 u1_is_cu_noisy;
1963 
1964     ULWORD64 *pu8_part_src_sigmaX;
1965     ULWORD64 *pu8_part_src_sigmaXSquared;
1966 
1967 } hme_search_prms_t;
1968 
1969 /**
1970 ******************************************************************************
1971  *  @struct  hme_err_prms_t
1972  *  @brief   This is input prms struct for SAD/SATD computation
1973 ******************************************************************************
1974  */
1975 typedef struct
1976 {
1977     /** Ptr to input blk for which err computed */
1978     U08 *pu1_inp;
1979 
1980     U16 *pu2_inp;
1981 
1982     /** Ptr to ref blk after adjusting for mv and coordinates in pic */
1983     U08 *pu1_ref;
1984 
1985     U16 *pu2_ref;
1986 
1987     /** Stride of input buffer */
1988     S32 i4_inp_stride;
1989     /** Stride of ref buffer */
1990     S32 i4_ref_stride;
1991     /** Mask of active partitions. */
1992     S32 i4_part_mask;
1993     /** Mask of active grid pts. Refer to GRID_PT_T enum for bit posns */
1994     S32 i4_grid_mask;
1995     /**
1996      * Pointer to SAD Grid where SADs for each partition are stored.
1997      * The layout is as follows: If there are M total partitions
1998      * and N active pts in the grid, then the first N results contain
1999      * first partition, e.g. 2Nx2N. Next N results contain 2nd partitino
2000      * sad, e.g. 2NxN_T. Totally we have MxN results.
2001      * Note: The active partition count may be lesser than M, still we
2002      * have results for M partitions
2003      */
2004     S32 *pi4_sad_grid;
2005 
2006     /** Pointer to TU_SPLIT grid flags */
2007     S32 *pi4_tu_split_flags;
2008 
2009     /** Pointer to the Child's satd cost */
2010     S32 *pi4_child_cost;
2011 
2012     /** pointer to the child'd TU_split flags */
2013     S32 *pi4_child_tu_split_flags;
2014 
2015     /** pointer to the child'd TU_early_cbf flags */
2016     S32 *pi4_child_tu_early_cbf;
2017 
2018     /** Pointer to TU early CBF flags */
2019     S32 *pi4_tu_early_cbf;
2020 
2021     /** pointer to the early cbf thresholds */
2022     S32 *pi4_tu_early_cbf_threshold;
2023 
2024     /** store the DC value */
2025     S32 i4_dc_val;
2026 
2027     /** Block width and ht of the block being evaluated for SAD */
2028     S32 i4_blk_wd;
2029     S32 i4_blk_ht;
2030 
2031     /**
2032      * Array of valid partition ids. E.g. if 2 partitions active,
2033      * then there will be 3 entries, 3rd entry being -1
2034      */
2035     S32 *pi4_valid_part_ids;
2036     /** Step size of the grid */
2037     S32 i4_step;
2038 
2039     /* Number of partitions */
2040     S32 i4_num_partitions;
2041 
2042     /** Store the tu_spli_flag cost */
2043     S32 i4_tu_split_cost;
2044 
2045     /** The max_depth for inter tu_tree */
2046     U08 u1_max_tr_depth;
2047 
2048     U08 u1_max_tr_size;
2049 
2050     /** Scratch memory for Doing hadamard */
2051     U08 *pu1_wkg_mem;
2052 
2053     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
2054 
2055 } err_prms_t;
2056 
2057 typedef struct grid
2058 {
2059     WORD32 num_grids; /* Number of grid to work with */
2060     WORD32 ref_buf_stride; /* Buffer stride of reference buffer */
2061     WORD32
2062         grd_sz_y_x; /* Packed 16 bits indicating grid spacing in y & x direction <--grid-size-y--><--grid-size-x--> */
2063     UWORD8 **ppu1_ref_ptr; /* Center point for the grid search */
2064     WORD32 *pi4_grd_mask; /* Mask indicating which grid points need to be evaluated */
2065     hme_mv_t *p_mv; /* <--MVy--><--MVx--> */
2066     WORD32 *p_ref_idx; /* Ref idx to which the grid is pointing */
2067 } grid_ctxt_t;
2068 
2069 typedef struct cand
2070 {
2071     hme_mv_t mv; /* MV corresponding to the candidate <--MVy--><--MVx--> */
2072     WORD32 ref_idx; /* Ref idx corresponding to the candidate */
2073     WORD32 grid_ix; /* Grid to which this candidate belongs */
2074     UWORD8 *pu1_ref_ptr; /* Pointer to the candidate */
2075 } cand_t;
2076 
2077 /**
2078 ******************************************************************************
2079  *  @struct  hme_ctb_prms_t
2080  *  @brief   Parameters to create the CTB list, which is a tree structure
2081 ******************************************************************************
2082  */
2083 typedef struct
2084 {
2085     /**
2086      * These parameters cover number of input 16x16, 32x32 and 64x64 results
2087      * and the number of output results that are mix of all above CU sizes.
2088      * i4_num_kxk_unified_out is relevant only if we are sending multiple CU
2089      * sizes for same region for RD Opt.
2090      */
2091     S32 i4_num_16x16_in;
2092     S32 i4_num_32x32_in;
2093     S32 i4_num_32x32_unified_out;
2094     S32 i4_num_64x64_in;
2095     S32 i4_num_64x64_unified_out;
2096 
2097     /** Pointers to results at differen CU sizes */
2098     search_results_t *ps_search_results_16x16;
2099     search_results_t *ps_search_results_32x32;
2100     search_results_t *ps_search_results_64x64;
2101 
2102     S32 i4_num_part_type;
2103 
2104     /** Indicates whether we have split at 64x64 level */
2105     S32 i4_cu_64x64_split;
2106     /** Indicates whether each of the 32x32 CU is split */
2107     S32 ai4_cu_32x32_split[4];
2108 
2109     /** X and y offset of the CTB */
2110     S32 i4_ctb_x;
2111     S32 i4_ctb_y;
2112 
2113     /**
2114      * Memory manager for the CTB that is responsible for node allocation
2115      * at a CU level
2116      */
2117     ctb_mem_mgr_t *ps_ctb_mem_mgr;
2118 
2119     /** Buffer manager that is responsible for memory allocation (pred bufs) */
2120     buf_mgr_t *ps_buf_mgr;
2121 } hme_ctb_prms_t;
2122 
2123 /**
2124 ******************************************************************************
2125  *  @struct  result_upd_prms_t
2126  *  @brief   Updation of results
2127 ******************************************************************************
2128  */
2129 typedef struct
2130 {
2131     /** Cost compuatation function ponter */
2132     PF_MV_COST_FXN pf_mv_cost_compute;
2133 
2134     /** Points to the SAD grid updated during SAD compute fxn */
2135     S32 *pi4_sad_grid;
2136 
2137     /** Points to the TU_SPLIT grid updates duting the SATD TU REC fxn */
2138     S32 *pi4_tu_split_flags;
2139 
2140     /**
2141      * This is the central mv of the grid. For e.g. if we have a 3x3 grid,
2142      * this covers the central pt's mv in the grid.
2143      */
2144     const search_node_t *ps_search_node_base;
2145 
2146     /** Search results structure updated by the result update fxn */
2147     search_results_t *ps_search_results;
2148 
2149     /** List of active partitions, only these are processed and updated */
2150     S32 *pi4_valid_part_ids;
2151 
2152     /** Reference id for this candt and grid */
2153     S08 i1_ref_idx;
2154 
2155     /** Mask of active pts in the grid */
2156     S32 i4_grid_mask;
2157 
2158     /**
2159      * For early exit reasons we may want to know the id of the least candt
2160      * This will correspond to id of  candt with least cost for 2Nx2N part,
2161      * if multiple partitions enabled, or if 1 part enabled, it will be for
2162      * id of candt of that partition
2163      */
2164     S32 i4_min_id;
2165 
2166     /** Step size of the grid */
2167     S32 i4_step;
2168 
2169     /** Mask of active partitions */
2170     S32 i4_part_mask;
2171 
2172     /** Min cost corresponding to min id */
2173     S32 i4_min_cost;
2174 
2175     /** Store the motion vectors in qpel unit*/
2176     S16 i2_mv_x;
2177 
2178     S16 i2_mv_y;
2179 
2180     U08 u1_pred_lx;
2181 
2182     subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
2183 
2184     /** Current candidate in the subpel refinement process*/
2185     search_node_t *ps_search_node;
2186 
2187 } result_upd_prms_t;
2188 
2189 /**
2190 ******************************************************************************
2191  *  @struct  mv_grid_t
2192  *  @brief   Grid of MVs storing results for a CTB and neighbours. For a CTB
2193  *           of size 64x64, we may store upto 16x16 mvs (one for each 4x4)
2194  *           along with 1 neighbour on each side. Valid only for encode layer
2195 ******************************************************************************
2196  */
2197 typedef struct
2198 {
2199     /** All the mvs in the grid */
2200     search_node_t as_node[NUM_MVS_IN_CTB_GRID];
2201 
2202     /** Stride of the grid */
2203     S32 i4_stride;
2204 
2205     /** Start offset of the 0,0 locn in CTB. */
2206     S32 i4_start_offset;
2207 } mv_grid_t;
2208 
2209 typedef struct
2210 {
2211     /* centroid's (x, y) co-ordinates in Q8 format */
2212     WORD32 i4_pos_x_q8;
2213 
2214     WORD32 i4_pos_y_q8;
2215 } centroid_t;
2216 
2217 typedef struct
2218 {
2219     S16 min_x;
2220 
2221     S16 min_y;
2222 
2223     S16 max_x;
2224 
2225     S16 max_y;
2226 
2227     /* The cumulative sum of partition sizes of the mvs */
2228     /* in this cluster */
2229     S16 area_in_pixels;
2230 
2231     S16 uni_mv_pixel_area;
2232 
2233     S16 bi_mv_pixel_area;
2234 
2235     mv_data_t as_mv[128];
2236 
2237     U08 num_mvs;
2238 
2239     /* Weighted average of all mvs in the cluster */
2240     centroid_t s_centroid;
2241 
2242     S08 ref_id;
2243 
2244     S32 max_dist_from_centroid;
2245 
2246     U08 is_valid_cluster;
2247 
2248 } cluster_data_t;
2249 
2250 typedef struct
2251 {
2252     cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_16x16];
2253 
2254     U08 num_clusters;
2255 
2256     U08 au1_num_clusters[MAX_NUM_REF];
2257 
2258     S16 intra_mv_area;
2259 
2260     S32 best_inter_cost;
2261 
2262 } cluster_16x16_blk_t;
2263 
2264 typedef struct
2265 {
2266     cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_32x32];
2267 
2268     U08 num_clusters;
2269 
2270     U08 au1_num_clusters[MAX_NUM_REF];
2271 
2272     S16 intra_mv_area;
2273 
2274     S08 best_uni_ref;
2275 
2276     S08 best_alt_ref;
2277 
2278     S32 best_inter_cost;
2279 
2280     U08 num_refs;
2281 
2282     U08 num_clusters_with_weak_sdi_density;
2283 
2284 } cluster_32x32_blk_t;
2285 
2286 typedef struct
2287 {
2288     cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_64x64];
2289 
2290     U08 num_clusters;
2291 
2292     U08 au1_num_clusters[MAX_NUM_REF];
2293 
2294     S16 intra_mv_area;
2295 
2296     S08 best_uni_ref;
2297 
2298     S08 best_alt_ref;
2299 
2300     S32 best_inter_cost;
2301 
2302     U08 num_refs;
2303 
2304 } cluster_64x64_blk_t;
2305 
2306 typedef struct
2307 {
2308     cluster_16x16_blk_t *ps_16x16_blk;
2309 
2310     cluster_32x32_blk_t *ps_32x32_blk;
2311 
2312     cluster_64x64_blk_t *ps_64x64_blk;
2313 
2314     cur_ctb_cu_tree_t *ps_cu_tree_root;
2315     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
2316     S32 nodes_created_in_cu_tree;
2317 
2318     S32 *pi4_blk_8x8_mask;
2319 
2320     S32 blk_32x32_mask;
2321 
2322     S32 sdi_threshold;
2323 
2324     S32 i4_frame_qstep;
2325 
2326     S32 i4_frame_qstep_multiplier;
2327 
2328     U08 au1_is_16x16_blk_split[16];
2329 
2330     S32 ai4_part_mask[16];
2331 
2332 } ctb_cluster_info_t;
2333 
2334 /**
2335 ******************************************************************************
2336  *  @struct  hme_merge_prms_t
2337  *  @brief   All parameters related to the merge process
2338 ******************************************************************************
2339  */
2340 typedef struct
2341 {
2342     /**
2343      * MV Range prms for the merged CU, this may have to be conservative
2344      * in comparison to individual CUs
2345      */
2346     range_prms_t *aps_mv_range[MAX_NUM_REF];
2347 
2348     /** Pointers to search results of 4 children CUs to be merged */
2349     search_results_t *ps_results_tl;
2350     search_results_t *ps_results_tr;
2351     search_results_t *ps_results_bl;
2352     search_results_t *ps_results_br;
2353 
2354     search_results_t *ps_results_grandchild;
2355 
2356     /** Pointer to search results of the parent CU updated during merge */
2357     search_results_t *ps_results_merge;
2358 
2359     inter_cu_results_t *ps_8x8_cu_results;
2360 
2361     /** Layer related context */
2362     layer_ctxt_t *ps_layer_ctxt;
2363 
2364     inter_ctb_prms_t *ps_inter_ctb_prms;
2365 
2366     /**
2367      * Points to an array of pointers. This array in turn points to
2368      * the active mv grid in each direction (L0/L1)
2369      */
2370     mv_grid_t **pps_mv_grid;
2371 
2372     ctb_cluster_info_t *ps_cluster_info;
2373 
2374     S08 *pi1_past_list;
2375 
2376     S08 *pi1_future_list;
2377 
2378     /** MV cost compute function */
2379     PF_MV_COST_FXN pf_mv_cost_compute;
2380 
2381     /** If segmentation info available for the parent block */
2382     S32 i4_seg_info_avail;
2383 
2384     /** Partition mask (if segmentation info available) */
2385     S32 i4_part_mask;
2386 
2387     /** Number of input results available for the merge proc from children*/
2388     S32 i4_num_inp_results;
2389 
2390     /** Whether SATD to be used for fpel searches */
2391     S32 i4_use_satd;
2392 
2393     /**
2394      * Number of result planes valid for this merge process. For example,
2395      * for fpel search in encode layer, we may have only L0 and L1
2396      */
2397     S32 i4_num_ref;
2398 
2399     /** Whether to use input or recon frm for search */
2400     S32 i4_use_rec;
2401 
2402     /** optimized mv grid flag : indicates if same mvgrid is used for both fpel and qpel
2403      *  This helps in copying fpel and qpel mv grid in pred context mv grid
2404      */
2405     S32 i4_mv_grid_opt;
2406 
2407     /** ctb size, typically 32 or 64 */
2408     S32 log_ctb_size;
2409 
2410     S32 i4_ctb_x_off;
2411 
2412     S32 i4_ctb_y_off;
2413 
2414     ME_QUALITY_PRESETS_T e_quality_preset;
2415 
2416     S32 i4_num_pred_dir_actual;
2417 
2418     U08 au1_pred_dir_searched[2];
2419 
2420     S32 i4_alpha_stim_multiplier;
2421 
2422     U08 u1_is_cu_noisy;
2423 
2424 } hme_merge_prms_t;
2425 
2426 /**
2427 ******************************************************************************
2428  *  @struct  mvbank_update_prms_t
2429  *  @brief   Useful prms for updating the mv bank
2430 ******************************************************************************
2431  */
2432 typedef struct
2433 {
2434     /** Number of references for which update to be done */
2435     S32 i4_num_ref;
2436 
2437     /**
2438      * Search blk size that was used, if this is different from the blk
2439      * size used in mv bank, then some replications or reductions may
2440      * have to be done. E.g. if search blk size is 8x8 and result blk
2441      * size is 4x4, then we have to update part NxN results to be
2442      * used for update along with replication of 2Nx2N result in each
2443      * of the 4 4x4 blk.
2444      */
2445     BLK_SIZE_T e_search_blk_size;
2446 
2447     /**
2448      * Redundant prm as it reflects differences between search blk size
2449      * and mv blk size if any
2450      */
2451     S32 i4_shift;
2452 
2453     S32 i4_num_active_ref_l0;
2454 
2455     S32 i4_num_active_ref_l1;
2456 
2457     S32 i4_num_results_to_store;
2458 } mvbank_update_prms_t;
2459 
2460 /**
2461 ******************************************************************************
2462  *  @struct  hme_subpel_prms_t
2463  *  @brief   input and control prms for subpel refinement
2464 ******************************************************************************
2465  */
2466 typedef struct
2467 {
2468     /** Relevant only for the case where we mix up results of diff cu sizes */
2469     S32 i4_num_16x16_candts;
2470     S32 i4_num_32x32_candts;
2471     S32 i4_num_64x64_candts;
2472 
2473     /** X and y offset of ctb w.r.t. start of pic */
2474     S32 i4_ctb_x_off;
2475     S32 i4_ctb_y_off;
2476 
2477     /** Max Number of diamond steps for hpel and qpel refinement */
2478     S32 i4_num_steps_hpel_refine;
2479     S32 i4_num_steps_qpel_refine;
2480 
2481     /** Whether SATD to be used or SAD to be used */
2482     S32 i4_use_satd;
2483 
2484     /**
2485      * Input ptr. This is updated inside the subpel refinement by picking
2486      * up correct adress
2487      */
2488     void *pv_inp;
2489 
2490     /**
2491      * Pred buffer ptr, updated inside subpel refinement process. This
2492      * location passed to the leaf fxn for copying the winner pred buf
2493      */
2494     U08 *pu1_pred;
2495 
2496     /** Interpolation fxn sent by top layer, should exact qpel be desired */
2497     PF_INTERP_FXN_T pf_qpel_interp;
2498 
2499     /** Working mem passed to leaf fxns */
2500     U08 *pu1_wkg_mem;
2501 
2502     /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
2503     S32 i4_pred_stride;
2504 
2505     /** Type of input ; sizeof(UWORD8) => unidir refinement, else BIDIR */
2506     S32 i4_inp_type;
2507 
2508     /** Stride of input buf, updated inside subpel fxn */
2509     S32 i4_inp_stride;
2510 
2511     /**
2512      * Pointer to the backward input ptr. This is also updated inside
2513      * the subpel fxn. Needed for BIDIR refinement where modified inpu
2514      * is 2I - P0
2515      */
2516     S16 *pi2_inp_bck;
2517 
2518     /** Indicates if CU merge uses SATD / SAD */
2519     S32 i4_use_satd_cu_merge;
2520 
2521     /** valid MV range in hpel and qpel units */
2522     range_prms_t *aps_mv_range_hpel[MAX_NUM_REF];
2523     range_prms_t *aps_mv_range_qpel[MAX_NUM_REF];
2524     /** Relevant only for mixed CU cases */
2525     search_results_t *ps_search_results_16x16;
2526     search_results_t *ps_search_results_32x32;
2527     search_results_t *ps_search_results_64x64;
2528 
2529     /** Cost computatino fxn ptr */
2530     PF_MV_COST_FXN pf_mv_cost_compute;
2531 
2532     /** Whether BI mode is allowed for this pic (not allowed in P) */
2533     S32 bidir_enabled;
2534 
2535     /**
2536      * Total number of references of current picture which is enocded
2537      */
2538     U08 u1_num_ref;
2539 
2540     /**
2541      * Number of candidates used for refinement
2542      * If given 1 candidate, then 2Nx2N is chosen as the best candidate
2543      */
2544     U08 u1_max_subpel_candts;
2545 
2546     U08 u1_subpel_candt_threshold;
2547 
2548     ME_QUALITY_PRESETS_T e_me_quality_presets;
2549 
2550     U08 u1_max_subpel_candts_2Nx2N;
2551     U08 u1_max_subpel_candts_NxN;
2552 
2553     U08 u1_max_num_subpel_refine_centers;
2554 
2555     subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
2556 
2557     S32 i4_num_act_ref_l0;
2558 
2559     S32 i4_num_act_ref_l1;
2560 
2561     U08 u1_is_cu_noisy;
2562 } hme_subpel_prms_t;
2563 
2564 /**
2565 ******************************************************************************
2566  *  @struct  layers_descr_t
2567  *  @brief   One such str exists for each ref and curr input in the me ctxt
2568  *           Has ctxt handles for all layers of a given POC
2569 ******************************************************************************
2570  */
2571 typedef struct
2572 {
2573     /** Handles for all layers. Entry 0 is finest layer */
2574     layer_ctxt_t *aps_layers[MAX_NUM_LAYERS];
2575 } layers_descr_t;
2576 
2577 /**
2578 ******************************************************************************
2579  *  @struct  blk_ctb_attrs_t
2580  *  @brief   The CTB is split into 16x16 blks. For each such blk, this str
2581  *           stores attributes of this blk w.r.t. ctb
2582 ******************************************************************************
2583  */
2584 typedef struct
2585 {
2586     /**
2587      * ID of the blk in the full ctb. Assuming the full ctb were coded,
2588      * this indicates what is the blk num of this blk (in encode order)
2589      * within the full ctb
2590      */
2591     U08 u1_blk_id_in_full_ctb;
2592 
2593     /** x and y coordinates of this blk w.r.t. ctb base */
2594     U08 u1_blk_x;
2595     U08 u1_blk_y;
2596     /**
2597      * Mask of 8x8 blks that are active. Bits 0-3 for blks 0-3 in raster order
2598      * within a 16x16 blk. This will be 0xf in interiors and < 0xf at rt/bot
2599      * boundaries or at bot rt corners, where we may not have full 16x16 blk
2600      */
2601     U08 u1_blk_8x8_mask;
2602 } blk_ctb_attrs_t;
2603 
2604 /**
2605 ******************************************************************************
2606  *  @struct  ctb_boundary_attrs_t
2607  *  @brief   Depending on the location of ctb (rt boundary, bot boundary,
2608  *           bot rt corner, elsewhere) this picks out the appropriate
2609  *           attributes of the ctb
2610 ******************************************************************************
2611  */
2612 typedef struct
2613 {
2614     /**
2615      * 4 bit variable, one for each of the 4 possible 32x32s in a full ctb
2616      * If any 32x32 is partially present / not present at boundaries, that
2617      * bit posn will be 0
2618      */
2619     U08 u1_merge_to_32x32_flag;
2620 
2621     /**
2622      * 1 bit flag indicating whether it is a complete ctb or not, and
2623      * consequently whether it can be merged to a full 64x64
2624      */
2625     U08 u1_merge_to_64x64_flag;
2626 
2627     /** Number of valid 16x16 blks (includes those partially/fully present*/
2628     U08 u1_num_blks_in_ctb;
2629 
2630     /** 16 bit variable indicating whether the corresponding 16x16 is valid */
2631     S32 cu_16x16_valid_flag;
2632 
2633     /**
2634      * For possible 16 16x16 blks in a CTB, we have one attribute str for
2635      * every valid blk. Tightly packed structure. For example,
2636      *  0  1  4  5
2637      *  2  3  6  7
2638      *  8  9 12 13
2639      * 10 11 14 15
2640      * Assuming the ctb width is only 48, blks 5,7,13,15 are invalid
2641      * Then We store attributes in the order: 0,1,2,3,4,6,8,9,10,11,12,14
2642      */
2643     blk_ctb_attrs_t as_blk_attrs[16];
2644 } ctb_boundary_attrs_t;
2645 
2646 typedef struct
2647 {
2648     S32 sdi;
2649 
2650     S32 ref_idx;
2651 
2652     S32 cluster_id;
2653 } outlier_data_t;
2654 
2655 /**
2656 ******************************************************************************
2657  *  @struct  coarse_dyn_range_prms_t
2658  *  @brief   The parameters for Dyn. Search Range in coarse ME
2659 ******************************************************************************
2660  */
2661 
2662 typedef struct
2663 {
2664     /* TO DO : size can be reduced, as not getting used for L0 */
2665 
2666     /** Dynamical Search Range parameters per layer & ref_pic */
2667     dyn_range_prms_t as_dyn_range_prms[MAX_NUM_LAYERS][MAX_NUM_REF];
2668 
2669     /** Min y value Normalized per POC distance */
2670     WORD16 i2_dyn_min_y_per_poc[MAX_NUM_LAYERS];
2671     /** Max y value Normalized per POC distance */
2672     WORD16 i2_dyn_max_y_per_poc[MAX_NUM_LAYERS];
2673 
2674 } coarse_dyn_range_prms_t;
2675 
2676 /**
2677 ******************************************************************************
2678  *  @struct  coarse_me_ctxt_t
2679  *  @brief   Handle for Coarse ME
2680 ******************************************************************************
2681  */
2682 typedef struct
2683 {
2684     /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
2685     search_node_t s_init_search_node[MAX_INIT_CANDTS * 2];
2686 
2687     /** For non enc layer, we search 8x8 blks and store results here */
2688     search_results_t s_search_results_8x8;
2689     /**
2690      * Below arays store input planes for each ref pic.
2691      * These are duplications, and are present within layer ctxts, but
2692      * kept here together for faster indexing during search
2693      */
2694     U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
2695 
2696     /** Ptr to all layer context placeholder for curr pic encoded */
2697     layers_descr_t *ps_curr_descr;
2698 
2699     /** Ptr to all layer ctxt place holder for all pics */
2700     layers_descr_t as_ref_descr[MAX_NUM_REF + 1 + NUM_BUFS_DECOMP_HME];
2701 
2702     /**
2703      * ME uses ref id lc to search multi ref. This TLU gets POC of
2704      * the pic w.r.t. a given ref id
2705      */
2706     S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
2707 
2708     /** use this array to get disp num from ref_idx. Used for L1 traqo **/
2709     S32 ai4_ref_idx_to_disp_num[MAX_NUM_REF];
2710 
2711     /** POC of pic encoded just before current */
2712     S32 i4_prev_poc;
2713 
2714     /** POC of curret pic being encoded */
2715     S32 i4_curr_poc;
2716 
2717     /** Number of HME layers encode + non encode */
2718     S32 num_layers;
2719 
2720     /** Alloc time parameter, max ref frms used for this session */
2721     S32 max_num_ref;
2722 
2723     /**
2724      * Number of layers that use explicit search. Explicit search means
2725      * that each ref id is searched separately
2726      */
2727     S32 num_layers_explicit_search;
2728 
2729     /**
2730      * Maximum number of results maintained at any refinement layer
2731      * search. Important from mem alloc perspective
2732      */
2733     S32 max_num_results;
2734 
2735     /** Same as above but for coarse layer */
2736     S32 max_num_results_coarse;
2737 
2738     /** Array of flags, one per layer indicating hwether layer is encoded */
2739     U08 u1_encode[MAX_NUM_LAYERS];
2740 
2741     /** Init prms send by encoder during create time */
2742     hme_init_prms_t s_init_prms;
2743 
2744     /**
2745      * Array look up created each frm, maintaining the corresponding
2746      * layer descr look up for each ref id
2747      */
2748     S32 a_ref_to_descr_id[MAX_NUM_REF];
2749 
2750     /**
2751      * Array lookup created each frame that maps a given ref id
2752      * pertaining to unified list to a L0/L1 list. Encoder searches in terms
2753      * of LC list or in other words does not differentiate between L0
2754      * and L1 frames for most of search. Finally to report results to
2755      * encoder, the ref id has to be remapped to suitable list
2756      */
2757     S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
2758     S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
2759 
2760     /** Width and ht of each layer */
2761     S32 a_wd[MAX_NUM_LAYERS];
2762     S32 a_ht[MAX_NUM_LAYERS];
2763 
2764     /** Histogram, one for each ref, allocated during craete time */
2765     mv_hist_t *aps_mv_hist[MAX_NUM_REF];
2766 
2767     /** Whether a given ref id in Lc list is past frm or future frm */
2768     U08 au1_is_past[MAX_NUM_REF];
2769 
2770     /** These are L0 and L1 lists, storing ref id Lc in them */
2771     S08 ai1_past_list[MAX_NUM_REF];
2772     S08 ai1_future_list[MAX_NUM_REF];
2773 
2774     /** Number of past and future ref pics sent this frm */
2775     S32 num_ref_past;
2776     S32 num_ref_future;
2777 
2778     void *pv_ext_frm_prms;
2779 
2780     hme_frm_prms_t *ps_hme_frm_prms;
2781 
2782     hme_ref_map_t *ps_hme_ref_map;
2783     /**
2784      *  Scale factor of any given ref lc to another ref in Q8
2785      *  First MAX_NUM_REF entries are to scale an mv of ref id k
2786      *  w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
2787      *  Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
2788      *  And so on
2789      */
2790     S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
2791 
2792     /** bits for a given ref id, in either list L0/L1 */
2793     U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
2794 
2795     /** Points to above: 1 ptr for each list */
2796     U08 *apu1_ref_bits_tlu_lc[2];
2797 
2798     /** number of b fraems between P, depends on number of hierarchy layers */
2799     S32 num_b_frms;
2800 
2801     /** Frame level qp passed every frame by ME's caller */
2802     S32 frm_qstep;
2803 
2804     /** Backup of frame parameters */
2805     hme_frm_prms_t s_frm_prms;
2806 
2807     /** Weighted prediction parameters for all references are stored
2808      *  Scratch buffers for populated widgted inputs are also stored in this
2809      */
2810     wgt_pred_ctxt_t s_wt_pred;
2811 
2812     /** Weighted pred enable flag */
2813     S32 i4_wt_pred_enable_flag;
2814 
2815     /* Pointer to hold 5 rows of best search node information */
2816     search_node_t *aps_best_search_nodes_4x8_n_rows[MAX_NUM_REF];
2817 
2818     search_node_t *aps_best_search_nodes_8x4_n_rows[MAX_NUM_REF];
2819 
2820     /* Pointer to hold 5 rows of best search node information */
2821     S16 *api2_sads_4x4_n_rows[MAX_NUM_REF];
2822 
2823     /*  Number of row buffers to store SADs and best search nodes */
2824     S32 i4_num_row_bufs;
2825 
2826     /* (HEVCE_MAX_HEIGHT>>1) assuming layer 1 is coarse layer and >>2 assuming block size is 4x4*/
2827     S32 ai4_row_index[(HEVCE_MAX_HEIGHT >> 1) >> 2];
2828 
2829     /* store L1 cost required for rate control for enc decision*/
2830     S32 i4_L1_hme_best_cost;
2831 
2832     /* store L1 cost required for modulation index calc*/
2833     //S32 i4_L1_hme_best_cost_for_ref;
2834 
2835     /* store L1 satd */
2836     S32 i4_L1_hme_sad;
2837     /* EIID: layer1 buffer to store the early inter intra costs and decisions */
2838     /* pic_level pointer stored here */
2839     ihevce_ed_blk_t *ps_ed_blk;
2840     /* EIID: layer1 buffer to store the sad/cost information for rate control
2841     or cu level qp modulation*/
2842     ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
2843     /** Dynamical Search Range parameters */
2844     coarse_dyn_range_prms_t s_coarse_dyn_range_prms;
2845 
2846     /** Dependency manager for Row level sync in HME pass */
2847     void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
2848 
2849     /* pointer buffers for memory mapping */
2850     UWORD8 *pu1_me_reverse_map_info;
2851 
2852     /*blk count which has higher SAD*/
2853     S32 i4_num_blks_high_sad;
2854 
2855     /*num of 8x8 blocks in nearest poc*/
2856     S32 i4_num_blks;
2857 
2858     /* thread id of the current context */
2859     WORD32 thrd_id;
2860 
2861     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
2862     void *pv_me_optimised_function_list;
2863 
2864     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
2865 
2866 } coarse_me_ctxt_t;
2867 
2868 /**
2869 ******************************************************************************
2870  *  @struct  coarse_dyn_range_prms_t
2871  *  @brief   The parameters for Dyn. Search Range in coarse ME
2872 ******************************************************************************
2873  */
2874 typedef struct
2875 {
2876     /** Dynamical Search Range parameters per ref_pic */
2877     dyn_range_prms_t as_dyn_range_prms[MAX_NUM_REF];
2878 
2879     /** Min y value Normalized per POC distance */
2880     WORD16 i2_dyn_min_y_per_poc;
2881     /** Max y value Normalized per POC distance */
2882     WORD16 i2_dyn_max_y_per_poc;
2883 
2884     /* The number of ref. pic. actually used in L0. Used to communicate */
2885     /* to ihevce_l0_me_frame_end and frame process                      */
2886     WORD32 i4_num_act_ref_in_l0;
2887 
2888     /*display number*/
2889     WORD32 i4_display_num;
2890 
2891 } l0_dyn_range_prms_t;
2892 
2893 /**
2894 ******************************************************************************
2895  *  @brief inter prediction (MC) context for me loop
2896 ******************************************************************************
2897  */
2898 /*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
2899 typedef struct
2900 {
2901     /** pointer to reference lists */
2902     recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
2903 
2904     /** scratch buffer for horizontal interpolation destination */
2905     WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
2906 
2907     /** scratch 16 bit buffer for interpolation in l0 direction */
2908     WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
2909 
2910     /** scratch 16 bit buffer for interpolation in l1 direction */
2911     WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
2912 
2913     /** Pointer to struct containing function pointers to
2914         functions in the 'common' library' */
2915     func_selector_t *ps_func_selector;
2916 
2917     /** common denominator used for luma weights */
2918     WORD32 i4_log2_luma_wght_denom;
2919 
2920     /** common denominator used for chroma weights */
2921     WORD32 i4_log2_chroma_wght_denom;
2922 
2923     /**  offset w.r.t frame start in horz direction (pels) */
2924     WORD32 i4_ctb_frm_pos_x;
2925 
2926     /**  offset w.r.t frame start in vert direction (pels) */
2927     WORD32 i4_ctb_frm_pos_y;
2928 
2929     /* Bit Depth of Input */
2930     WORD32 i4_bit_depth;
2931 
2932     /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
2933     UWORD8 u1_chroma_array_type;
2934 
2935     /** weighted_pred_flag      */
2936     WORD8 i1_weighted_pred_flag;
2937 
2938     /** weighted_bipred_flag    */
2939     WORD8 i1_weighted_bipred_flag;
2940 
2941     /** Structure to describe extra CTBs around frame due to search
2942         range associated with distributed-mode. Entries are top, left,
2943         right and bottom */
2944     WORD32 ai4_tile_xtra_pel[4];
2945 
2946 } inter_pred_me_ctxt_t;
2947 
2948 typedef void FT_CALC_SATD_AND_RESULT(err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms);
2949 
2950 typedef struct
2951 {
2952     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1;
2953     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9;
2954     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17;
2955     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_eq_1;
2956     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_9;
2957     FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_17;
2958     FT_HAD_8X8_USING_4_4X4_R *pf_had_8x8_using_4_4x4_r;
2959     FT_HAD_16X16_R *pf_had_16x16_r;
2960     FT_HAD_32X32_USING_16X16 *pf_compute_32x32HAD_using_16x16;
2961 } me_func_selector_t;
2962 
2963 /**
2964 ******************************************************************************
2965  *  @struct  me_frm_ctxt_t
2966  *  @brief   Handle for ME
2967 ******************************************************************************
2968  */
2969 typedef struct
2970 {
2971     /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
2972     search_node_t s_init_search_node[MAX_INIT_CANDTS];
2973 
2974     /** Motion Vectors array */
2975     mv_t as_search_cand_mv[MAX_INIT_CANDTS];
2976 
2977     /** Results of 16 16x16 blks within a CTB used in enc layer */
2978     search_results_t as_search_results_16x16[16];
2979 
2980     /** Results of 4 32x32 blks in a ctb for enc layer merge stage */
2981     search_results_t as_search_results_32x32[4];
2982 
2983     /** Same as above but fo 64x64 blk */
2984     search_results_t s_search_results_64x64;
2985 
2986     /**
2987      * Below arays store input, 4 recon planes for each ref pic.
2988      * These are duplications, and are present within layer ctxts, but
2989      * kept here together for faster indexing during search
2990      */
2991 
2992     U08 *apu1_list_rec_fxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
2993     U08 *apu1_list_rec_hxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
2994     U08 *apu1_list_rec_fxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
2995     U08 *apu1_list_rec_hxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
2996     U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
2997 
2998     void *apv_list_dep_mngr[MAX_NUM_LAYERS][MAX_NUM_REF];
2999 
3000     /** Ptr to all layer context placeholder for curr pic encoded */
3001     layers_descr_t *ps_curr_descr;
3002 
3003     /**
3004      * ME uses ref id lc to search multi ref. This TLU gets POC of
3005      * the pic w.r.t. a given ref id
3006      */
3007     S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
3008 
3009     /** POC of pic encoded just before current */
3010     S32 i4_prev_poc;
3011 
3012     /** POC of curret pic being encoded */
3013     S32 i4_curr_poc;
3014 
3015     /** Buf mgr for memory allocation */
3016     buf_mgr_t s_buf_mgr;
3017 
3018     /** MV Grid for L0 and L1, this is active one used */
3019     mv_grid_t as_mv_grid[2];
3020 
3021     /**
3022      * MV grid for FPEL and QPEL maintained separately. Depending on the
3023      * correct prediction res. being used, copy appropriate results to
3024      * the as_mv_Grid structure
3025      */
3026     mv_grid_t as_mv_grid_fpel[2];
3027     mv_grid_t as_mv_grid_qpel[2];
3028 
3029     /** Number of HME layers encode + non encode */
3030     S32 num_layers;
3031 
3032     /** Alloc time parameter, max ref frms used for this session */
3033     S32 max_num_ref;
3034 
3035     /**
3036      * Number of layers that use explicit search. Explicit search means
3037      * that each ref id is searched separately
3038      */
3039     S32 num_layers_explicit_search;
3040 
3041     /**
3042      * Maximum number of results maintained at any refinement layer
3043      * search. Important from mem alloc perspective
3044      */
3045     S32 max_num_results;
3046 
3047     /** Same as above but for coarse layer */
3048     S32 max_num_results_coarse;
3049 
3050     /** Array of flags, one per layer indicating hwether layer is encoded */
3051     U08 u1_encode[MAX_NUM_LAYERS];
3052 
3053     /* Parameters used for lambda computation */
3054     frm_lambda_ctxt_t s_frm_lambda_ctxt;
3055 
3056     /**
3057      * Array look up created each frm, maintaining the corresponding
3058      * layer descr look up for each ref id
3059      */
3060     S32 a_ref_to_descr_id[MAX_NUM_REF];
3061 
3062     /**
3063      * Array lookup created each frame that maps a given ref id
3064      * pertaining to unified list to a L0/L1 list. Encoder searches in terms
3065      * of LC list or in other words does not differentiate between L0
3066      * and L1 frames for most of search. Finally to report results to
3067      * encoder, the ref id has to be remapped to suitable list
3068      */
3069     S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
3070     S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
3071 
3072     /** Width and ht of each layer */
3073     S32 i4_wd;
3074     S32 i4_ht;
3075 
3076     /** Histogram, one for each ref, allocated during craete time */
3077     mv_hist_t *aps_mv_hist[MAX_NUM_REF];
3078 
3079     /**
3080      * Back input requiring > 8  bit precision, allocated during
3081      * create time, storing 2I-P0 for Bidir refinement
3082      */
3083     S16 *pi2_inp_bck;
3084     ctb_boundary_attrs_t as_ctb_bound_attrs[NUM_CTB_BOUNDARY_TYPES];
3085 
3086     /** Whether a given ref id in Lc list is past frm or future frm */
3087     U08 au1_is_past[MAX_NUM_REF];
3088 
3089     /** These are L0 and L1 lists, storing ref id Lc in them */
3090     S08 ai1_past_list[MAX_NUM_REF];
3091     S08 ai1_future_list[MAX_NUM_REF];
3092 
3093     /** Number of past and future ref pics sent this frm */
3094     S32 num_ref_past;
3095     S32 num_ref_future;
3096 
3097     /**
3098      * Passed by encoder, stored as void to avoid header file inclusion
3099      * of encoder wks into ME, these are frm prms passed by encoder,
3100      * pointers to ctbanalyse_t and cu_analyse_t structures and the
3101      * corresponding running ptrs
3102      */
3103 
3104     ctb_analyse_t *ps_ctb_analyse_base;
3105     cur_ctb_cu_tree_t *ps_cu_tree_base;
3106     me_ctb_data_t *ps_me_ctb_data_base;
3107 
3108     ctb_analyse_t *ps_ctb_analyse_curr_row;
3109     cu_analyse_t *ps_cu_analyse_curr_row;
3110     cur_ctb_cu_tree_t *ps_cu_tree_curr_row;
3111     me_ctb_data_t *ps_me_ctb_data_curr_row;
3112 
3113     /** Log2 of ctb size e.g. for 64 size, it will be 6 */
3114     S32 log_ctb_size;
3115 
3116     hme_frm_prms_t *ps_hme_frm_prms;
3117 
3118     hme_ref_map_t *ps_hme_ref_map;
3119 
3120     /**
3121      *  Scale factor of any given ref lc to another ref in Q8
3122      *  First MAX_NUM_REF entries are to scale an mv of ref id k
3123      *  w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
3124      *  Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
3125      *  And so on
3126      */
3127     S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
3128 
3129     /** bits for a given ref id, in either list L0/L1 */
3130     U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
3131 
3132     /** Points to above: 1 ptr for each list */
3133     U08 *apu1_ref_bits_tlu_lc[2];
3134 
3135     /**
3136      *  Frame level base pointer to L0 IPE ctb analyze structures.
3137      *  This strucutres include the following
3138      *
3139      *  1. Best costs and modes at all levels of CTB (CU=8,16,32,64)
3140      *  2. Recommended IPE intra CU sizes for this CTB size
3141      *  3. Early intra/inter decision structures for all 8x8 blocks of CTB
3142      *     populated by L1-ME and L1-IPE
3143      *
3144      */
3145     ipe_l0_ctb_analyse_for_me_t *ps_ipe_l0_ctb_frm_base;
3146 
3147     /** array of ptrs to intra cost per layer encoded, stored at 8x8 */
3148     double *apd_intra_cost[MAX_NUM_LAYERS];
3149 
3150     /** number of b fraems between P, depends on number of hierarchy layers */
3151     S32 num_b_frms;
3152 
3153     /** Frame level qp passed every frame by ME's caller */
3154     S32 frm_qstep;
3155 
3156     /** Frame level qp with higher precision : left shifted by 8 */
3157     S32 qstep_ls8;
3158 
3159     /** Backup of frame parameters */
3160     hme_frm_prms_t s_frm_prms;
3161 
3162     /** Weighted prediction parameters for all references are stored
3163      *  Scratch buffers for populated widgted inputs are also stored in this
3164      */
3165     wgt_pred_ctxt_t s_wt_pred;
3166 
3167     /** Weighted pred enable flag */
3168     S32 i4_wt_pred_enable_flag;
3169 
3170     /** Results of 16 16x16 blks within a CTB used in enc layer */
3171     inter_cu_results_t as_cu16x16_results[16];
3172 
3173     /** Results of 4 32x32 blks in a ctb for enc layer merge stage */
3174     inter_cu_results_t as_cu32x32_results[4];
3175 
3176     /** Same as above but fo 64x64 blk */
3177     inter_cu_results_t s_cu64x64_results;
3178 
3179     /** Results of 64 8x8 blks within a CTB used in enc layer */
3180     inter_cu_results_t as_cu8x8_results[64];
3181 
3182     WORD32 i4_is_prev_frame_reference;
3183 
3184     rc_quant_t *ps_rc_quant_ctxt;
3185 
3186     /** Dynamical Search Range parameters */
3187     l0_dyn_range_prms_t as_l0_dyn_range_prms[NUM_SG_INTERLEAVED];
3188 
3189     /** Dependency manager for Row level sync in L0 ME pass */
3190     void *pv_dep_mngr_l0_me_sync;
3191 
3192     /** Pointer to structure containing function pointers of encoder*/
3193     me_func_selector_t *ps_func_selector;
3194 
3195     cluster_16x16_blk_t *ps_blk_16x16;
3196 
3197     cluster_32x32_blk_t *ps_blk_32x32;
3198 
3199     cluster_64x64_blk_t *ps_blk_64x64;
3200 
3201     ctb_cluster_info_t *ps_ctb_cluster_info;
3202 
3203     fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
3204 
3205     /* thread id of the current context */
3206     WORD32 thrd_id;
3207 
3208     /* dependency manager for froward ME sync */
3209     void *pv_dep_mngr_encloop_dep_me;
3210     WORD32 i4_l0me_qp_mod;
3211 
3212     /*mc ctxt to reuse lume inter pred fucntion
3213     for the purpose of TRAQO*/
3214     inter_pred_me_ctxt_t s_mc_ctxt;
3215 
3216     WORD32 i4_rc_pass;
3217     /*pic type*/
3218     WORD32 i4_pic_type;
3219 
3220     WORD32 i4_temporal_layer;
3221 
3222     WORD32 i4_count;
3223 
3224     WORD32 i4_use_const_lamda_modifier;
3225 
3226     double f_i_pic_lamda_modifier;
3227 
3228     UWORD8 u1_is_curFrame_a_refFrame;
3229 
3230     /* src_var related variables */
3231     U32 au4_4x4_src_sigmaX[MAX_NUM_SIGMAS_4x4];
3232     U32 au4_4x4_src_sigmaXSquared[MAX_NUM_SIGMAS_4x4];
3233 } me_frm_ctxt_t;
3234 
3235 /**
3236 ******************************************************************************
3237  *  @struct  me_ctxt_t
3238  *  @brief   Handle for ME
3239 ******************************************************************************
3240  */
3241 typedef struct
3242 {
3243     /** Init prms send by encoder during create time */
3244     hme_init_prms_t s_init_prms;
3245 
3246     /** Not used in encoder, relevant to test bench */
3247     U08 *pu1_debug_out;
3248 
3249     void *pv_ext_frm_prms;
3250 
3251     /* Frame level ME ctxt */
3252     me_frm_ctxt_t *aps_me_frm_prms[MAX_NUM_ME_PARALLEL];
3253 
3254     /** Ptr to all layer ctxt place holder for all pics */
3255     /** number of reference descriptors should be equal to max number of active references **/
3256     layers_descr_t as_ref_descr[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1];
3257 
3258     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
3259     void *pv_me_optimised_function_list;
3260 
3261     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
3262 
3263     /* Pointer to Tile params base */
3264     void *pv_tile_params_base;
3265 
3266 } me_ctxt_t;
3267 
3268 typedef struct
3269 {
3270     /** array of context for each thread */
3271     coarse_me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
3272 
3273     /** memtabs storage memory */
3274     hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
3275 
3276     /** Frame level parameters for ME */
3277     hme_frm_prms_t s_frm_prms;
3278 
3279     /** Holds all reference mapping */
3280     hme_ref_map_t s_ref_map;
3281 
3282     /** number of threads created run time */
3283     WORD32 i4_num_proc_thrds;
3284 
3285     /** Dependency manager for Row level sync in HME pass */
3286     /* Note : Indexing should be like layer_id - 1        */
3287     void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
3288     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
3289     void *pv_me_optimised_function_list;
3290 
3291     ihevce_cmn_opt_func_t s_cmn_opt_func;
3292 } coarse_me_master_ctxt_t;
3293 
3294 typedef struct
3295 {
3296     /** array of context for each thread */
3297     me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
3298 
3299     /** memtabs storage memory */
3300     hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
3301 
3302     /** Frame level parameters for ME */
3303     hme_frm_prms_t as_frm_prms[MAX_NUM_ME_PARALLEL];
3304 
3305     /** Holds all reference mapping */
3306     hme_ref_map_t as_ref_map[MAX_NUM_ME_PARALLEL];
3307 
3308     /** number of threads created run time */
3309     WORD32 i4_num_proc_thrds;
3310 
3311     /** number of me frames running in parallel */
3312     WORD32 i4_num_me_frm_pllel;
3313 
3314     /** Pointer to structure containing function pointers of encoder*/
3315     me_func_selector_t s_func_selector;
3316     /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
3317     void *pv_me_optimised_function_list;
3318 
3319     ihevce_cmn_opt_func_t s_cmn_opt_func;
3320 
3321     /* Pointer to Tile params base */
3322     void *pv_tile_params_base;
3323 
3324 } me_master_ctxt_t;
3325 
3326 typedef struct
3327 {
3328     S16 i2_mv_x;
3329 
3330     S16 i2_mv_y;
3331 
3332     U08 u1_ref_idx;
3333 
3334     U32 au4_node_map[2 * MAP_Y_MAX];
3335 
3336 } subpel_dedup_enabler_t;
3337 
3338 typedef subpel_dedup_enabler_t hme_dedup_enabler_t;
3339 
3340 typedef struct
3341 {
3342     layer_ctxt_t *ps_curr_layer;
3343 
3344     layer_ctxt_t *ps_coarse_layer;
3345 
3346     U08 *pu1_num_fpel_search_cands;
3347 
3348     S32 *pi4_ref_id_lc_to_l0_map;
3349 
3350     S32 *pi4_ref_id_lc_to_l1_map;
3351 
3352     S32 i4_pos_x;
3353 
3354     S32 i4_pos_y;
3355 
3356     S32 i4_num_act_ref_l0;
3357 
3358     S32 i4_num_act_ref_l1;
3359 
3360     search_candt_t *ps_search_cands;
3361 
3362     U08 u1_search_candidate_list_index;
3363 
3364     S32 i4_max_num_init_cands;
3365 
3366     U08 u1_pred_dir;
3367 
3368     /* Indicates the position of the current predDir in the processing order of predDir */
3369     U08 u1_pred_dir_ctr;
3370 
3371     /* The following 4 flags apply exclusively to spatial candidates */
3372     U08 u1_is_topRight_available;
3373 
3374     U08 u1_is_topLeft_available;
3375 
3376     U08 u1_is_top_available;
3377 
3378     U08 u1_is_left_available;
3379 
3380     S08 i1_default_ref_id;
3381 
3382     S08 i1_alt_default_ref_id;
3383 
3384     U08 u1_num_results_in_mvbank;
3385 
3386     BLK_SIZE_T e_search_blk_size;
3387 
3388 } fpel_srch_cand_init_data_t;
3389 
3390 typedef struct
3391 {
3392     U08 *pu1_pred;
3393 
3394     S32 i4_pred_stride;
3395 
3396     U08 u1_pred_buf_array_id;
3397 
3398 } hme_pred_buf_info_t;
3399 
3400 /*****************************************************************************/
3401 /* Typedefs                                                                  */
3402 /*****************************************************************************/
3403 typedef void (*PF_SAD_FXN_T)(err_prms_t *);
3404 
3405 typedef void (*PF_SAD_RESULT_FXN_T)(err_prms_t *, result_upd_prms_t *ps_result_prms);
3406 
3407 typedef WORD32 (*PF_SAD_FXN_TU_REC)(
3408     err_prms_t *,
3409     WORD32 lambda,
3410     WORD32 lamda_q_shift,
3411     WORD32 i4_frm_qstep,
3412     me_func_selector_t *ps_func_selector);
3413 
3414 typedef void (*PF_RESULT_FXN_T)(result_upd_prms_t *);
3415 
3416 typedef void (*PF_CALC_SAD_AND_RESULT)(
3417     hme_search_prms_t *, wgt_pred_ctxt_t *, err_prms_t *, result_upd_prms_t *, U08 **, S32);
3418 
3419 #endif /* _HME_DEFS_H_ */
3420