1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file hme_utils.h
23 *
24 * \brief
25 *    Prototypes for various utilities used by coarse/refinement/subpel fxns
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _HME_UTILS_H_
37 #define _HME_UTILS_H_
38 
39 /*****************************************************************************/
40 /* Functions                                                                 */
41 /*****************************************************************************/
42 
43 /**
44 ********************************************************************************
45 *  @fn     hme_init_histogram(
46 *
47 *  @brief  Top level entry point for Coarse ME. Runs across blocks and does the
48 *          needful by calling other low level routines.
49 *
50 *  @param[in,out]  ps_hist : the histogram structure
51 *
52 *  @param[in]  i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
53 *
54 *  @param[in]  i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
55 *
56 *  @return None
57 ********************************************************************************
58 */
59 void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y);
60 
61 /**
62 ********************************************************************************
63 *  @fn     hme_update_histogram(
64 *
65 *  @brief  Updates the histogram given an mv entry
66 *
67 *  @param[in,out]  ps_hist : the histogram structure
68 *
69 *  @param[in]  i4_mv_x : x component of the mv (fpel units)
70 *
71 *  @param[in]  i4_mv_y : y component of the mv (fpel units)
72 *
73 *  @return None
74 ********************************************************************************
75 */
76 void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y);
77 
78 /**
79 ********************************************************************************
80 *  @fn     hme_get_global_mv(
81 *
82 *  @brief  returns the global mv of a previous picture. Accounts for the fact
83 *          that the delta poc of the previous picture may have been different
84 *          from delta poc of current picture. Delta poc is POC difference
85 *          between a picture and its reference.
86 *
87 *  @param[out]  ps_mv: mv_t structure where the motion vector is returned
88 *
89 *  @param[in]  i4_delta_poc: the delta poc for the current pic w.r.t. reference
90 *
91 *  @return None
92 ********************************************************************************
93 */
94 void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc);
95 
96 /**
97 ********************************************************************************
98 *  @fn     hme_calculate_global_mv(
99 *
100 *  @brief  Calculates global mv for a given histogram
101 *
102 *  @param[in]  ps_hist : the histogram structure
103 *
104 *  @param[in]  ps_mv : used to return the global mv
105 *
106 *  @param[in]  e_lobe_type : refer to GMV_MVTYPE_T
107 *
108 *  @return None
109 ********************************************************************************
110 */
111 void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type);
112 
113 /**
114 ********************************************************************************
115 *  @fn     hme_collate_fpel_results(search_results_t *ps_search_results,
116 *           S32 i1_ref_idx, S32 i1_idx_to_merge)
117 *
118 *  @brief  After full pel search and result seeding in every search iteration
119 *          results, this function called to collapse a given search iteration
120 *          results into another.
121 *
122 *  @param[in,out] ps_search_results : Search results data structure
123 *  @param[in]     i1_ref_idx: id of the search iteration where the results
124                               will be collapsed
125 *  @param[in]     i1_idx_to_merge : id of the search iteration from which the
126 *                   results are picked up.
127 
128 *
129 *  @return None
130 ********************************************************************************
131 */
132 void hme_collate_fpel_results(
133     search_results_t *ps_search_results, S08 i1_ref_idx, S08 i1_idx_to_merge);
134 
135 /**
136 ********************************************************************************
137 *  @fn     hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
138             search_results_t *ps_search_results, S32 i4_num_ref)
139 *
140 *  @brief  For a given CU whose results are in ps_search_results, the 17x17
141 *          mv grid is updated for future use within the CTB
142 *
143 *  @param[in] ps_search_results : Search results data structure
144 *
145 *  @param[out] pps_mv_grid: The mv grid (as many as num ref)
146 *
147 *  @param[in]  i4_num_ref: nuber of search iterations to update
148 *
149 *  @param[in]  mv_res_shift: Shift for resolution of mv (fpel/qpel)
150 *
151 *  @return None
152 ********************************************************************************
153 */
154 void hme_map_mvs_to_grid(
155     mv_grid_t **pps_mv_grid,
156     search_results_t *ps_search_results,
157     U08 *pu1_pred_dir_searched,
158     S32 i4_num_pred_dir);
159 
160 /**
161 ********************************************************************************
162 *  @fn     hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
163 *
164 *  @brief  Expands the part mask to a list of valid part ids terminated by -1
165 *
166 *  @param[in] i4_part_mask : bit mask of active partitino ids
167 *
168 *  @param[out] pi4_valid_part_ids : array, each entry has one valid part id
169 *               Terminated by -1 to signal end.
170 *
171 *  @return number of partitions
172 ********************************************************************************
173 */
174 S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids);
175 
176 /**
177 ********************************************************************************
178 *  @fn     get_num_blks_in_ctb(S32 i4_ctb_x,
179                         S32 i4_ctb_y,
180                         S32 i4_pic_wd,
181                         S32 i4_pic_ht,
182                         S32 i4_blk_size)
183 *
184 *  @brief  returns the number of blks in the ctb (64x64 ctb)
185 *
186 *  @param[in] i4_ctb_x : pixel x offset of the top left corner of ctb in pic
187 *
188 *  @param[in] i4_ctb_y : pixel y offset of the top left corner of ctb in pic
189 *
190 *  @param[in] i4_ctb_x : width of the picture in pixels
191 *
192 *  @param[in] i4_pic_ht : height of hte picture in pixels
193 *
194 *  @param[in] i4_blk_size : Size of the blk in pixels
195 *
196 *  @return number of blks in the ctb
197 ********************************************************************************
198 */
199 S32 get_num_blks_in_ctb(S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_pic_wd, S32 i4_pic_ht, S32 i4_blk_size);
200 
201 /**
202 ********************************************************************************
203 *  @fn     hevc_avg_2d(U08 *pu1_src1,
204 *                   U08 *pu1_src2,
205 *                   S32 i4_src1_stride,
206 *                   S32 i4_src2_stride,
207 *                   S32 i4_blk_wd,
208 *                   S32 i4_blk_ht,
209 *                   U08 *pu1_dst,
210 *                   S32 i4_dst_stride)
211 *
212 *
213 *  @brief  point wise average of two buffers into a third buffer
214 *
215 *  @param[in] pu1_src1 : first source buffer
216 *
217 *  @param[in] pu1_src2 : 2nd source buffer
218 *
219 *  @param[in] i4_src1_stride : stride of source 1 buffer
220 *
221 *  @param[in] i4_src2_stride : stride of source 2 buffer
222 *
223 *  @param[in] i4_blk_wd : block width
224 *
225 *  @param[in] i4_blk_ht : block height
226 *
227 *  @param[out] pu1_dst : destination buffer
228 *
229 *  @param[in] i4_dst_stride : stride of the destination buffer
230 *
231 *  @return void
232 ********************************************************************************
233 */
234 void hevc_avg_2d(
235     U08 *pu1_src1,
236     U08 *pu1_src2,
237     S32 i4_src1_stride,
238     S32 i4_src2_stride,
239     S32 i4_blk_wd,
240     S32 i4_blk_ht,
241     U08 *pu1_dst,
242     S32 i4_dst_stride);
243 
244 /**
245 ********************************************************************************
246 *  @fn     hme_pick_back_search_node(search_results_t *ps_search_results,
247 *                                   search_node_t *ps_search_node_fwd,
248 *                                   S32 i4_part_idx,
249 *                                   layer_ctxt_t *ps_curr_layer)
250 *
251 *
252 *  @brief  returns the search node corresponding to a ref idx in same or
253 *          opp direction. Preference is given to opp direction, but if that
254 *          does not yield results, same direction is attempted.
255 *
256 *  @param[in] ps_search_results: search results overall
257 *
258 *  @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
259 *
260 *  @param[in] i4_part_idx : partition id
261 *
262 *  @param[in] ps_curr_layer : layer context for current layer.
263 *
264 *  @return search node corresponding to hte "other direction"
265 ********************************************************************************
266 */
267 search_node_t *hme_pick_back_search_node(
268     search_results_t *ps_search_results,
269     search_node_t *ps_search_node_fwd,
270     S32 i4_part_idx,
271     layer_ctxt_t *ps_curr_layer);
272 
273 /**
274 ********************************************************************************
275 *  @fn     hme_study_input_segmentation(U08 *pu1_inp,
276 *                                       S32 i4_inp_stride,
277 *                                       S32 limit_active_partitions)
278 *
279 *
280 *  @brief  Examines input 16x16 for possible edges and orientations of those,
281 *          and returns a bit mask of partitions that should be searched for
282 *
283 *  @param[in] pu1_inp : input buffer
284 *
285 *  @param[in] i4_inp_stride: input stride
286 *
287 *  @param[in] limit_active_partitions : 1: Edge algo done and partitions are
288 *               limited, 0 : Brute force, all partitions considered
289 *
290 *  @return part mask (bit mask of active partitions to search)
291 ********************************************************************************
292 */
293 S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions);
294 
295 /**
296 ********************************************************************************
297 *  @fn     hme_init_search_results(search_results_t *ps_search_results,
298 *                           S32 i4_num_ref,
299 *                           S32 i4_num_best_results,
300 *                           S32 i4_num_results_per_part,
301 *                           BLK_SIZE_T e_blk_size,
302 *                           S32 i4_x_off,
303 *                           S32 i4_y_off)
304 *
305 *  @brief  Initializes the search results structure with some key attributes
306 *
307 *  @param[out] ps_search_results : search results structure to initialise
308 *
309 *  @param[in] i4_num_Ref: corresponds to the number of ref ids searched
310 *
311 *  @param[in] i4_num_best_results: Number of best results for the CU to
312 *               be maintained in the result structure
313 *
314 *  @param[in] i4_num_results_per_part: Per active partition the number of best
315 *               results to be maintained
316 *
317 *  @param[in] e_blk_size: blk size of the CU for which this structure used
318 *
319 *  @param[in] i4_x_off: x offset of the top left of CU from CTB top left
320 *
321 *  @param[in] i4_y_off: y offset of the top left of CU from CTB top left
322 *
323 *  @return void
324 ********************************************************************************
325 */
326 void hme_init_search_results(
327     search_results_t *ps_search_results,
328     S32 i4_num_ref,
329     S32 i4_num_best_results,
330     S32 i4_num_results_per_part,
331     BLK_SIZE_T e_blk_size,
332     S32 i4_x_off,
333     S32 i4_y_off,
334     U08 *pu1_is_past);
335 
336 /**
337 ********************************************************************************
338 *  @fn     hme_reset_search_results((search_results_t *ps_search_results,
339 *                               S32 i4_part_mask)
340 *
341 *
342 *  @brief  Resets the best results to maximum values, so as to allow search
343 *          for the new CU's partitions. The existing results may be from an
344 *          older CU using same structure.
345 *
346 *  @param[in] ps_search_results: search results structure
347 *
348 *  @param[in] i4_part_mask : bit mask of active partitions
349 *
350 *  @param[in] mv_res : Resolution of the mv predictors (fpel/qpel)
351 *
352 *  @return void
353 ********************************************************************************
354 */
355 void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res);
356 
357 /**
358 ********************************************************************************
359 *  @fn     hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
360 *                               S32 i4_step,
361 *                               range_prms_t *ps_mvrange)
362 *
363 *  @brief  Given a central pt within mv range, and a grid of points surrounding
364 *           this pt, this function returns a grid mask of pts within search rng
365 *
366 *  @param[in] ps_search_node: the centre pt of the grid
367 *
368 *  @param[in] i4_step: step size of grid
369 *
370 *  @param[in] ps_mvrange: structure containing the current mv range
371 *
372 *  @return bitmask of the  pts in grid within search range
373 ********************************************************************************
374 */
375 S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange);
376 
377 /**
378 ********************************************************************************
379 *  @fn    layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
380                                     S32 i4_layer_id)
381 *
382 *  @brief  returns the layer ctxt of the layer with given id from the temporally
383 *          previous frame
384 *
385 *  @param[in] ps_ctxt : ME context
386 *
387 *  @param[in] i4_layer_id : id of layer required
388 *
389 *  @return layer ctxt of given layer id in temporally previous frame
390 ********************************************************************************
391 */
392 layer_ctxt_t *hme_get_past_layer_ctxt(
393     me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel);
394 
395 layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id);
396 
397 /**
398 ********************************************************************************
399 *  @fn    void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
400                         BLK_SIZE_T e_blk_size,
401                         S32 i4_num_ref,
402                         S32 i4_num_results_per_part)
403 *
404 *  @brief  Given a blk size to be used for this layer, this function initialize
405 *          the mv bank to make it ready to store and return results.
406 *
407 *  @param[in, out] ps_layer_ctxt: pointer to layer ctxt
408 *
409 *  @param[in] e_blk_size : resolution at which mvs are stored
410 *
411 *  @param[in] i4_num_ref: number of reference frames corresponding to which
412 *              results are stored.
413 *
414 *  @param[in] e_blk_size : resolution at which mvs are stored
415 *
416 *  @param[in] i4_num_results_per_part : Number of results to be stored per
417 *               ref idx. So these many best results stored
418 *
419 *  @return void
420 ********************************************************************************
421 */
422 void hme_init_mv_bank(
423     layer_ctxt_t *ps_layer_ctxt,
424     BLK_SIZE_T e_blk_size,
425     S32 i4_num_ref,
426     S32 i4_num_results_per_part,
427     U08 u1_enc);
428 
429 /**
430 ********************************************************************************
431 *  @fn    void hme_derive_search_range(range_prms_t *ps_range,
432 *                                   range_prms_t *ps_pic_limit,
433 *                                   range_prms_t *ps_mv_limit,
434 *                                   S32 i4_x,
435 *                                   S32 i4_y,
436 *                                   S32 blk_wd,
437 *                                   S32 blk_ht)
438 *
439 *  @brief  given picture limits and blk dimensions and mv search limits, obtains
440 *          teh valid search range such that the blk stays within pic boundaries,
441 *          where picture boundaries include padded portions of picture
442 *
443 *  @param[out] ps_range: updated with actual search range
444 *
445 *  @param[in] ps_pic_limit : picture boundaries
446 *
447 *  @param[in] ps_mv_limit: Search range limits for the mvs
448 *
449 *  @param[in] i4_x : x coordinate of the blk
450 *
451 *  @param[in] i4_y : y coordinate of the blk
452 *
453 *  @param[in] blk_wd : blk width
454 *
455 *  @param[in] blk_ht : blk height
456 *
457 *  @return void
458 ********************************************************************************
459 */
460 void hme_derive_search_range(
461     range_prms_t *ps_range,
462     range_prms_t *ps_pic_limit,
463     range_prms_t *ps_mv_limit,
464     S32 i4_x,
465     S32 i4_y,
466     S32 blk_wd,
467     S32 blk_ht);
468 
469 /**
470 ********************************************************************************
471 *  @fn    void hme_get_spatial_candt(layer_ctxt_t *ps_curr_layer,
472 *                                   BLK_SIZE_T e_search_blk_size,
473 *                                   S32 blk_x,
474 *                                   S32 blk_y,
475 *                                   S08 i1_ref_idx,
476 *                                   search_node_t *ps_top_neighbours,
477 *                                   search_node_t *ps_left_neighbours,
478 *                                   S32 i4_result_id);
479 *
480 *  @brief  Obtains top, top left, top right and left adn bottom left candts
481 *
482 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
483 *
484 *  @param[in] e_search_blk_size : search blk size of current layer
485 *
486 *  @param[in] i4_blk_x : x coordinate of the block in mv bank
487 *
488 *  @param[in] i4_blk_y : y coordinate of the block in mv bank
489 *
490 *  @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
491 *              results, useful if multiple ref idx candts maintained separately.
492 *
493 *  @param[out] ps_top_neighbours : T, TL, TR candts are output here
494 *
495 *  @param[out] ps_left_neighbours : L BL candts outptu here
496 *
497 *  @param[in] i4_result_id : If multiple results stored per ref idx, this
498 *              pts to the id of the result
499 *
500 *  @return void
501 ********************************************************************************
502 */
503 void hme_get_spatial_candt(
504     layer_ctxt_t *ps_curr_layer,
505     BLK_SIZE_T e_search_blk_size,
506     S32 blk_x,
507     S32 blk_y,
508     S08 i1_ref_idx,
509     search_node_t *ps_top_neighbours,
510     search_node_t *ps_left_neighbours,
511     S32 i4_result_id,
512     S32 i4_tr_avail,
513     S32 i4_bl_avail,
514     S32 encode);
515 
516 void hme_get_spatial_candt_in_l1_me(
517     layer_ctxt_t *ps_curr_layer,
518     BLK_SIZE_T e_search_blk_size,
519     S32 i4_blk_x,
520     S32 i4_blk_y,
521     S08 i1_ref_idx,
522     U08 u1_pred_dir,
523     search_node_t *ps_top_neighbours,
524     search_node_t *ps_left_neighbours,
525     S32 i4_result_id,
526     S32 tr_avail,
527     S32 bl_avail,
528     S32 i4_num_act_ref_l0,
529     S32 i4_num_act_ref_l1);
530 
531 /**
532 ********************************************************************************
533 *  @fn    void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
534 *                                   S32 i4_blk_x,
535 *                                   S32 i4_blk_y,
536 *                                   mvgrid_t *ps_mv_grid ,
537 *                                   S32 i1_ref_id)
538 *
539 *  @brief  The 18x18 MV grid for a ctb, is filled in first row and 1st col
540 *          this corresponds to neighbours (TL, T, TR, L, BL)
541 *
542 *  @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
543 *
544 *  @param[in] blk_x : x coordinate of the block in mv bank
545 *
546 *  @param[in] blk_y : y coordinate of the block in mv bank
547 *
548 *  @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
549 *
550 *  @param[in] u1_pred_lx : Corresponds to pred dir from which to pick up mv
551 *              results
552 *
553 *  @return void
554 ********************************************************************************
555 */
556 void hme_fill_ctb_neighbour_mvs(
557     layer_ctxt_t *ps_curr_layer,
558     S32 blk_x,
559     S32 blk_y,
560     mv_grid_t *ps_mv_grid,
561     U08 u1_pred_dir_ctr,
562     U08 u1_default_ref_id,
563     S32 i4_num_act_ref_l0);
564 
565 /**
566 ********************************************************************************
567 *  @fn     void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
568 *
569 *  @brief  Allocates a block of size = i4_size from working memory and returns
570 *
571 *  @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
572 *
573 *  @param[in]  i4_size : size required
574 *
575 *  @return void pointer to allocated memory, NULL if failure
576 ********************************************************************************
577 */
578 void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size);
579 
580 void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr);
581 
582 void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size);
583 
584 void hme_reset_ctb_mem_mgr(ctb_mem_mgr_t *ps_ctb_mem_mgr);
585 
586 void hme_init_ctb_mem_mgr(ctb_mem_mgr_t *ps_ctb_mem_mgr, U08 *pu1_mem, S32 size);
587 
588 void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt);
589 
590 void hme_scale_mv_grid(mv_grid_t *ps_mv_grid);
591 
592 void hme_downscale_mv_grid(mv_grid_t *ps_mv_grid);
593 
594 void hme_create_parent_ctb(
595     ctb_node_t *ps_ctb_node_parent,
596     ctb_node_t *ps_ctb_child_tl,
597     ctb_node_t *ps_ctb_child_tr,
598     ctb_node_t *ps_ctb_child_bl,
599     ctb_node_t *ps_ctb_child_br,
600     CU_SIZE_T e_cu_size_parent,
601     buf_mgr_t *ps_buf_mgr);
602 
603 void hme_create_merged_ctbs(
604     search_results_t *ps_results_merged,
605     ctb_mem_mgr_t *ps_ctb_mem_mgr,
606     buf_mgr_t *ps_buf_mgr,
607     ctb_node_t **pps_ctb_list_unified,
608     S32 num_candts);
609 
610 void hme_init_mv_grid(mv_grid_t *ps_mv_grid);
611 
612 typedef void (*pf_get_wt_inp)(
613     layer_ctxt_t *ps_curr_layer,
614     wgt_pred_ctxt_t *ps_wt_inp_prms,
615     S32 dst_stride,
616     S32 pos_x,
617     S32 pos_y,
618     S32 size,
619     S32 num_ref,
620     U08 u1_is_wt_pred_on);
621 
622 /**
623 ********************************************************************************
624 *  @fn    void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
625 *
626 *  @brief  Pads horizontally to left side. Each pixel replicated across a line
627 *
628 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
629 *
630 *  @param[in] stride : stride of destination buffer
631 *
632 *  @param[in] pad_wd : Amt of horizontal padding to be done
633 *
634 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
635 *
636 *  @return void
637 ********************************************************************************
638 */
639 void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht);
640 
641 /**
642 ********************************************************************************
643 *  @fn    void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
644 *
645 *  @brief  Pads horizontally to rt side. Each pixel replicated across a line
646 *
647 *  @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
648 *
649 *  @param[in] stride : stride of destination buffer
650 *
651 *  @param[in] pad_wd : Amt of horizontal padding to be done
652 *
653 *  @param[in] pad_ht : Number of lines for which horizontal padding to be done
654 *
655 *  @return void
656 ********************************************************************************
657 */
658 void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht);
659 
660 /**
661 ********************************************************************************
662 *  @fn    void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
663 *
664 *  @brief  Pads vertically on the top. Repeats the top line for top padding
665 *
666 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
667 *
668 *  @param[in] stride : stride of destination buffer
669 *
670 *  @param[in] pad_ht : Amt of vertical padding to be done
671 *
672 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
673 *
674 *  @return void
675 ********************************************************************************
676 */
677 void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd);
678 
679 /**
680 ********************************************************************************
681 *  @fn    void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
682 *
683 *  @brief  Pads vertically on the bot. Repeats the top line for top padding
684 *
685 *  @param[in] pu1_dst : destination pointer. Points to the line to be repeated
686 *
687 *  @param[in] stride : stride of destination buffer
688 *
689 *  @param[in] pad_ht : Amt of vertical padding to be done
690 *
691 *  @param[in] pad_wd : Number of columns for which vertical padding to be done
692 *
693 *  @return void
694 ********************************************************************************
695 */
696 void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd);
697 
698 /**
699 **************************************************************************************************
700 *  @fn     hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
701 *
702 *  @brief  Population the pu_results structure with the results after the subpel refinement
703 *
704 *          This is called post subpel refinmenent for 16x16s, 8x8s and
705 *          for post merge evaluation for 32x32,64x64 CUs
706 *
707 *  @param[in,out] ps_search_results : Search results data structure
708 *                 - ps_cu_results : cu_results data structure
709 *                   ps_pu_result  : Pointer to the memory for storing PU's
710 *
711 ****************************************************************************************************
712 */
713 void hme_populate_pus(
714     me_ctxt_t *ps_thrd_ctxt,
715     me_frm_ctxt_t *ps_ctxt,
716     hme_subpel_prms_t *ps_subpel_prms,
717     search_results_t *ps_search_results,
718     inter_cu_results_t *ps_cu_results,
719     inter_pu_results_t *ps_pu_results,
720     pu_result_t *ps_pu_result,
721     inter_ctb_prms_t *ps_inter_ctb_prms,
722     wgt_pred_ctxt_t *ps_wt_prms,
723     layer_ctxt_t *ps_curr_layer,
724     U08 *pu1_pred_dir_searched,
725     WORD32 i4_num_active_ref);
726 
727 void hme_populate_pus_8x8_cu(
728     me_ctxt_t *ps_thrd_ctxt,
729     me_frm_ctxt_t *ps_ctxt,
730     hme_subpel_prms_t *ps_subpel_prms,
731     search_results_t *ps_search_results,
732     inter_cu_results_t *ps_cu_results,
733     inter_pu_results_t *ps_pu_results,
734     pu_result_t *ps_pu_result,
735     inter_ctb_prms_t *ps_inter_ctb_prms,
736     U08 *pu1_pred_dir_searched,
737     WORD32 i4_num_active_ref,
738     U08 u1_blk_8x8_mask);
739 
740 S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
741     me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb);
742 
743 /**
744 ********************************************************************************
745 *  @fn     hme_update_dynamic_search_params
746 *
747 *  @brief  Update the Dynamic search params based on the current MVs
748 *
749 *  @param[in,out]  ps_dyn_range_prms    [inout] : Dyn. Range Param str.
750 *                  i2_mvy               [in]    : current MV y comp.
751 *
752 *  @return None
753 ********************************************************************************
754 */
755 void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy);
756 
757 S32 hme_create_child_nodes_cu_tree(
758     cur_ctb_cu_tree_t *ps_cu_tree_root,
759     cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
760     S32 nodes_already_created);
761 
762 void hme_add_new_node_to_a_sorted_array(
763     search_node_t *ps_result_node,
764     search_node_t **pps_sorted_array,
765     U08 *pu1_shifts,
766     U32 u4_num_results_updated,
767     U08 u1_shift);
768 
769 S32 hme_find_pos_of_implicitly_stored_ref_id(
770     S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results);
771 
772 S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt);
773 
774 void hme_init_pred_buf_info(
775     hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
776     hme_pred_buf_mngr_t *ps_buf_mngr,
777     U08 u1_pu1_wd,
778     U08 u1_pu1_ht,
779     PART_TYPE_T e_part_type);
780 
781 void hme_debrief_bipred_eval(
782     part_type_results_t *ps_part_type_result,
783     hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
784     hme_pred_buf_mngr_t *ps_pred_buf_mngr,
785     U08 *pu1_allocated_pred_buf_array_indixes,
786     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list);
787 
788 U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
789     SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset);
790 
791 U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index);
792 
793 void hme_search_cand_data_init(
794     S32 *pi4_id_Z,
795     S32 *pi4_id_coloc,
796     S32 *pi4_num_coloc_cands,
797     U08 *pu1_search_candidate_list_index,
798     S32 i4_num_act_ref_l0,
799     S32 i4_num_act_ref_l1,
800     U08 u1_is_bidir_enabled,
801     U08 u1_4x4_blk_in_l1me);
802 
803 void hme_compute_variance_for_all_parts(
804     U08 *pu1_data,
805     S32 i4_data_stride,
806     S32 *pi4_valid_part_array,
807     U32 *pu4_variance,
808     S32 i4_num_valid_parts,
809     U08 u1_cu_size);
810 
811 void hme_compute_sigmaX_and_sigmaXSquared(
812     U08 *pu1_data,
813     S32 i4_buf_stride,
814     void *pv_sigmaX,
815     void *pv_sigmaXSquared,
816     U08 u1_base_blk_wd,
817     U08 u1_base_blk_ht,
818     U08 u1_blk_wd,
819     U08 u1_blk_ht,
820     U08 u1_is_sigma_pointer_size_32_bit,
821     U08 u1_array_stride);
822 
823 void hme_compute_final_sigma_of_pu_from_base_blocks(
824     U32 *pu4_SigmaX,
825     U32 *pu4_SigmaXSquared,
826     ULWORD64 *pu8_final_sigmaX,
827     ULWORD64 *pu8_final_sigmaX_Squared,
828     U08 u1_cu_size,
829     U08 u1_base_block_size,
830     S32 i4_part_id,
831     U08 u1_base_blk_array_stride);
832 
833 void hme_compute_stim_injected_distortion_for_all_parts(
834     U08 *pu1_pred,
835     S32 i4_pred_stride,
836     S32 *pi4_valid_part_array,
837     ULWORD64 *pu8_src_sigmaX,
838     ULWORD64 *pu8_src_sigmaXSquared,
839     S32 *pi4_sad_array,
840     S32 i4_alpha_stim_multiplier,
841     S32 i4_inv_wt,
842     S32 i4_inv_wt_shift_val,
843     S32 i4_num_valid_parts,
844     S32 i4_wpred_log_wdc,
845     U08 u1_cu_size);
846 
847 void sigma_for_cusize_16_and_baseblock_size_16(
848     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
849 
850 void sigma_for_cusize_16_and_baseblock_size_8(
851     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared, U08 diff_cu_size);
852 
853 void sigma_for_cusize_16_and_baseblock_size_4(
854     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
855 
856 void sigma_for_cusize_32_and_baseblock_size_32(
857     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
858 
859 void sigma_for_cusize_64_and_baseblock_size_64(
860     U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
861 
862 void hme_choose_best_noise_preserver_amongst_fpel_and_subpel_winners(
863     fullpel_refine_ctxt_t *ps_fullpel_winner_data,
864     search_node_t **pps_part_results,
865     layer_ctxt_t *ps_curr_layer,
866     wgt_pred_ctxt_t *ps_wt_inp_prms,
867     U32 *pu4_src_variance,
868     S32 i4_cu_x_off_in_ctb,
869     S32 i4_cu_y_off_in_ctb,
870     S32 i4_ctb_x_off,
871     S32 i4_ctb_y_off,
872     S32 i4_inp_stride,
873     S32 i4_alpha_stim_multiplier,
874     U08 u1_subpel_uses_satd);
875 
876 #if TEMPORAL_NOISE_DETECT
877 WORD32 ihevce_16x16block_temporal_noise_detect(
878     WORD32 had_block_size,
879     WORD32 ctb_width,
880     WORD32 ctb_height,
881     ihevce_ctb_noise_params *ps_ctb_noise_params,
882     fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
883     hme_search_prms_t *s_search_prms_blk,
884     me_frm_ctxt_t *ps_ctxt,
885     WORD32 num_pred_dir,
886     WORD32 i4_num_act_ref_l0,
887     WORD32 i4_num_act_ref_l1,
888     WORD32 i4_cu_x_off,
889     WORD32 i4_cu_y_off,
890     wgt_pred_ctxt_t *ps_wt_inp_prms,
891     WORD32 input_stride,
892     WORD32 index_8x8_block,
893     WORD32 num_horz_blocks,
894     WORD32 num_8x8_in_ctb_row,
895     WORD32 i4_index_variance);
896 #endif
897 
898 /**
899 ********************************************************************************
900 *  @fn     hme_decide_part_types(search_results_t *ps_search_results)
901 *
902 *  @brief  Does uni/bi evaluation accross various partition types,
903 *          decides best inter partition types for the CU, compares
904 *          intra cost and decides the best K results for the CU
905 *
906 *          This is called post subpel refinmenent for 16x16s, 8x8s and
907 *          for post merge evaluation for 32x32,64x64 CUs
908 *
909 *  @param[in,out] ps_search_results : Search results data structure
910 *                 - In : 2 lists of upto 2mvs & refids, active partition mask
911 *                 - Out: Best results for final rdo evaluation of the cu
912 *
913 *  @param[in]     ps_subpel_prms : Sub pel params data structure
914 
915 *
916 *  @par Description
917 *    --------------------------------------------------------------------------------
918 *     Flow:
919 *            for each category (SMP,AMP,2Nx2N based on part mask)
920 *            {
921 *                for each part_type
922 *                {
923 *                    for each part
924 *                        pick best candidate from each list
925 *                    combine uni part type
926 *                    update best results for part type
927 *                }
928 *                pick the best part type for given category (for SMP & AMP)
929 *            }
930 *                    ||
931 *                    ||
932 *                    \/
933 *            for upto 3 best part types
934 *            {
935 *                for each part
936 *                {
937 *                    compute fixed size had for all uni and remember coeffs
938 *                    compute bisatd
939 *                    uni vs bi and gives upto two results
940 *                    also gives the pt level pred buffer
941 *                }
942 *             }
943 *                    ||
944 *                    ||
945 *                    \/
946 *            select X candidates for tu recursion as per the Note below
947 *               tu_rec_on_part_type (reuse transform coeffs)
948 *                    ||
949 *                    ||
950 *                    \/
951 *            insert intra nodes at appropriate result id
952 *                    ||
953 *                    ||
954 *                    \/
955 *            populate y best resuls for rdo based on preset
956 *
957 *     Note :
958 *     number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
959 *     number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
960 *     --------------------------------------------------------------------------------
961 *
962 *  @return None
963 ********************************************************************************
964 */
965 void hme_decide_part_types(
966     inter_cu_results_t *ps_cu_results,
967     inter_pu_results_t *ps_pu_results,
968     inter_ctb_prms_t *ps_inter_ctb_prms,
969     me_frm_ctxt_t *ps_ctxt,
970     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
971     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
972 
973 void hme_compute_pred_and_evaluate_bi(
974     inter_cu_results_t *ps_cu_results,
975     inter_pu_results_t *ps_pu_results,
976     inter_ctb_prms_t *ps_inter_ctb_prms,
977     part_type_results_t *ps_part_type_result,
978     ULWORD64 *pu8_winning_pred_sigmaXSquare,
979     ULWORD64 *pu8_winning_pred_sigmaX,
980     ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
981     ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
982 
983 /**
984 ********************************************************************************
985 *  @fn     hme_insert_intra_nodes_post_bipred
986 *
987 *  @brief  Compares intra costs (populated by IPE) with the best inter costs
988 *          (populated after evaluating bi-pred) and updates the best results
989 *          if intra cost is better
990 *
991 *  @param[in,out]  ps_cu_results    [inout] : Best results structure of CU
992 *                  ps_cur_ipe_ctb   [in]    : intra results for the current CTB
993 *                  i4_frm_qstep     [in]    : current frame quantizer(qscale)*
994 *
995 *  @return None
996 ********************************************************************************
997 */
998 void hme_insert_intra_nodes_post_bipred(
999     inter_cu_results_t *ps_cu_results,
1000     ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
1001     WORD32 i4_frm_qstep);
1002 
1003 void hme_set_mv_limit_using_dvsr_data(
1004     me_frm_ctxt_t *ps_ctxt,
1005     layer_ctxt_t *ps_curr_layer,
1006     range_prms_t *ps_mv_limit,
1007     S16 *pi2_prev_enc_frm_max_mv_y,
1008     U08 u1_num_act_ref_pics);
1009 
1010 S32 hme_part_mask_populator(
1011     U08 *pu1_inp,
1012     S32 i4_inp_stride,
1013     U08 u1_limit_active_partitions,
1014     U08 u1_is_bPic,
1015     U08 u1_is_refPic,
1016     U08 u1_blk_8x8_mask,
1017     ME_QUALITY_PRESETS_T e_me_quality_preset);
1018 
1019 #endif /* #ifndef _HME_UTILS_H_ */
1020