1 /******************************************************************************
2  *
3  * Copyright (C) 2018 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 /*!
21 ******************************************************************************
22 * \file ihevce_ipe_structs.h
23 *
24 * \brief
25 *    This file contains strcutures of ipe pass
26 *
27 * \date
28 *    18/09/2012
29 *
30 * \author
31 *    Ittiam
32 *
33 ******************************************************************************
34 */
35 
36 #ifndef _IHEVCE_IPE_STRUCTS_H_
37 #define _IHEVCE_IPE_STRUCTS_H_
38 
39 /*****************************************************************************/
40 /* Constant Macros                                                           */
41 /*****************************************************************************/
42 #define MAX_FAST_IP_MODES 23
43 #define NUM_INTRA_RDOPT_MODES 1
44 #if 1  // FAST_PART_WITH_OPTION_4
45 #define MAX_TREE_NODES                                                                             \
46     ((MAX_CTB_SIZE == MIN_TU_SIZE)                                                                 \
47          ? 1                                                                                       \
48          : (MAX_CTB_SIZE == (MIN_TU_SIZE << 1)                                                     \
49                 ? 5                                                                                \
50                 : (MAX_CTB_SIZE == (MIN_TU_SIZE << 2)                                              \
51                        ? 21                                                                        \
52                        : (MAX_CTB_SIZE == (MIN_TU_SIZE << 3) ? 37 : 53))))
53 #else  // FAST_PART_WITH_OPTION_4
54 #define MAX_TREE_NODES                                                                             \
55     ((MAX_CTB_SIZE == MIN_TU_SIZE)                                                                 \
56          ? 1                                                                                       \
57          : (MAX_CTB_SIZE == (MIN_TU_SIZE << 1)                                                     \
58                 ? 5                                                                                \
59                 : (MAX_CTB_SIZE == (MIN_TU_SIZE << 2)                                              \
60                        ? 9                                                                         \
61                        : (MAX_CTB_SIZE == (MIN_TU_SIZE << 3) ? 13 : 17))))
62 #endif  // FAST_PART_WITH_OPTION_4
63 #define BOTTOM_LEFT_FLAG 0x0000000F
64 #define LEFT_FLAG 0x000000F0
65 #define TOP_LEFT_FLAG 0x00010000
66 #define TOP_FLAG 0x00000F00
67 #define TOP_RIGHT_FLAG 0x0000F000
68 #define MAX_UWORD8 0xFF
69 #define MAX_DOUBLE 1.7e+308  ///< max. value of double-type value
70 #define MAX_INTRA_COST_IPE 0x0F7F7F7F
71 
72 #define MAX_TU_ROW_IN_CTB (MAX_CTB_SIZE >> 2)
73 #define MAX_TU_COL_IN_CTB (MAX_CTB_SIZE >> 2)
74 
75 #define BIT_DEPTH 8
76 
77 #define FAST_PARTITION_WITH_TRANSFORM 1
78 
79 #define IHEVCE_INTRA_REF_FILTERING C
80 #define IHEVCE_INTRA_LUMA_REF_SUBSTITUTION C
81 /*****************************************************************************/
82 /* Constant Macros                                                           */
83 /*****************************************************************************/
84 /** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
85 #define NUM_TRANS_TYPES 5
86 #define INTRA_PLANAR 0
87 #define INTRA_DC 1
88 
89 /*****************************************************************************/
90 /* Function Macros                                                           */
91 /*****************************************************************************/
92 #define INTRA_ANGULAR(x) (x)
93 
94 /** @breif max 30bit value */
95 #define MAX30 ((1 << 30) - 1)
96 
97 /* @bried macro to clip a data to max of 30bits (assuming unsgined) */
98 #define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
99 
100 /* @bried compute the (lambda * rate) with a qshift and clip result to 30bits */
101 #define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
102 
103 /*****************************************************************************/
104 /* Typedefs                                                                  */
105 /*****************************************************************************/
106 typedef UWORD32 (*pf_res_trans_luma_had)(
107     UWORD8 *pu1_origin,
108     WORD32 src_strd,
109     UWORD8 *pu1_pred_buf,
110     WORD32 pred_strd,
111     WORD16 *pi2_dst,
112     WORD32 dst_strd,
113     WORD32 size);
114 
115 typedef void (*pf_ipe_intra_pred)(
116     UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
117 
118 typedef UWORD32 (*pf_ipe_res_trans)(
119     UWORD8 *pu1_src,
120     UWORD8 *pu1_pred,
121     WORD16 *pi2_tmp,
122     WORD16 *pi2_dst,
123     WORD32 src_strd,
124     WORD32 pred_strd,
125     WORD32 dst_strd,
126     WORD32 chroma_flag);
127 
128 typedef FT_CALC_HAD_SATD_8BIT *pf_ipe_res_trans_had;
129 /*****************************************************************************/
130 /* Enums                                                                     */
131 /*****************************************************************************/
132 
133 typedef enum
134 {
135 
136     IPE_CTXT = 0,
137     IPE_THRDS_CTXT,
138 
139     /* should be last entry */
140     NUM_IPE_MEM_RECS
141 
142 } IPE_MEM_TABS_T;
143 
144 typedef enum
145 {
146     IPE_FUNC_MODE_0 = 0,
147     IPE_FUNC_MODE_1,
148     IPE_FUNC_MODE_2,
149     IPE_FUNC_MODE_3TO9,
150     IPE_FUNC_MODE_10,
151     IPE_FUNC_MODE_11TO17,
152     IPE_FUNC_MODE_18_34,
153     IPE_FUNC_MODE_19TO25,
154     IPE_FUNC_MODE_26,
155     IPE_FUNC_MODE_27TO33,
156 
157     NUM_IPE_FUNCS
158 
159 } IPE_FUNCS_T;
160 
161 /*****************************************************************************/
162 /* Structure                                                                 */
163 /*****************************************************************************/
164 /**
165 ******************************************************************************
166  *  @brief    IPE CTB to CU and TU Quadtree Recursive Structure
167 ******************************************************************************
168  */
169 
170 typedef struct ihevce_ipe_cu_tree_t ihevce_ipe_cu_tree_t;
171 
172 typedef struct ihevce_ipe_cu_tree_t
173 {
174     /**
175      * Origin of current coding unit relative to top-left of CTB
176      */
177     UWORD16 u2_x0;
178 
179     UWORD16 u2_y0;
180 
181     /**
182      * Origin of current coding unit relative to top-left of Picture
183      */
184     UWORD16 u2_orig_x;
185 
186     UWORD16 u2_orig_y;
187 
188     /**
189      * Size of current coding unit in luma pixels
190      */
191     UWORD8 u1_cu_size;
192 
193     UWORD8 u1_width;
194 
195     UWORD8 u1_height;
196 
197     UWORD8 u1_depth;
198 
199     UWORD8 u1_part_flag_pos;
200 
201     UWORD8 u1_log2_nt;
202 
203     WORD32 i4_nbr_flag;
204 
205     /**
206      * Recursive Bracketing Parameters
207      */
208     UWORD8 best_mode;
209 
210     WORD32 best_satd;
211 
212     WORD32 best_cost;
213 
214     /**
215      * Number of pixels available in these neighbors
216      */
217     UWORD8 u1_num_left_avail;
218 
219     UWORD8 u1_num_top_avail;
220 
221     UWORD8 u1_num_top_right_avail;
222 
223     UWORD8 u1_num_bottom_left_avail;
224 
225     UWORD8 au1_best_mode_1tu[NUM_BEST_MODES];
226 
227     WORD32 au4_best_cost_1tu[NUM_BEST_MODES];
228 
229     UWORD8 au1_best_mode_4tu[NUM_BEST_MODES];
230 
231     WORD32 au4_best_cost_4tu[NUM_BEST_MODES];
232 
233     ihevce_ipe_cu_tree_t *ps_parent;
234 
235     ihevce_ipe_cu_tree_t *ps_sub_cu[4];
236 
237     /* best mode bits cost */
238     UWORD16 u2_mode_bits_cost;
239 
240 } ihevce_ipe_cu_tree_node_t;
241 
242 /**
243 ******************************************************************************
244  *  @brief    IPE module context memory
245 ******************************************************************************
246  */
247 typedef struct
248 {
249     ihevce_ipe_cu_tree_t *ps_ipe_cu_tree;
250 
251     /* one parent and four children */
252     ihevce_ipe_cu_tree_t as_ipe_cu_tree[5];
253 
254     UWORD8 au1_ctb_mode_map[MAX_TU_ROW_IN_CTB + 1][MAX_TU_COL_IN_CTB + 1];
255 
256     UWORD8 au1_cand_mode_list[3];
257 
258     /** Pointer to structure containing function pointers of common*/
259     func_selector_t *ps_func_selector;
260 
261     /**
262      * CU level Qp / 6
263      */
264     WORD32 i4_cu_qp_div6;
265 
266     /**
267      * CU level Qp % 6
268      */
269     WORD32 i4_cu_qp_mod6;
270 
271     /** array of luma intra prediction function pointers */
272     pf_ipe_intra_pred apf_ipe_lum_ip[NUM_IPE_FUNCS];
273 
274     /** array of function pointers for residual and
275      *  forward transform for all transform sizes
276      */
277     pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
278 
279     /** array of function pointers for residual and
280      *  forward transform for all transform sizes
281      */
282     pf_res_trans_luma_had apf_resd_trns_had[NUM_TRANS_TYPES];
283 
284     /** array of pointer to store the scaling matrices for
285      *  all transform sizes and qp % 6 (pre computed)
286      */
287     WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
288 
289     /** array of pointer to store the re-scaling matrices for
290      *  all transform sizes and qp % 6 (pre computed)
291      */
292     WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
293 
294     /** Qunatization rounding factor for inter and intra CUs */
295     WORD32 i4_quant_rnd_factor[2];
296 
297     UWORD8 u1_ctb_size;
298 
299     UWORD8 u1_min_cu_size;
300 
301     UWORD8 u1_min_tu_size;
302 
303     UWORD16 u2_ctb_row_num;
304 
305     UWORD16 u2_ctb_num_in_row;
306 
307     WORD8 i1_QP;
308 
309     UWORD8 u1_num_b_frames;
310 
311     UWORD8 b_sad_type;
312 
313     UWORD8 u1_ipe_step_size;
314 
315     WORD32 i4_ol_satd_lambda;
316 
317     WORD32 i4_ol_sad_lambda;
318 
319     UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
320 
321     /**
322      * Pointer to (1,1) location in au1_nbr_ctb_map
323      */
324     UWORD8 *pu1_ctb_nbr_map;
325 
326     /**
327      * neigbour map buffer stride;
328      */
329     WORD32 i4_nbr_map_strd;
330 
331     /** CTB neighbour availability flags */
332     nbr_avail_flags_t s_ctb_nbr_avail_flags;
333 
334     /** Slice Type of the current picture being processed */
335     WORD32 i4_slice_type;
336 
337     /** Temporal ID of the current picture being processed */
338     WORD32 i4_temporal_lyr_id;
339 
340     WORD32 i4_ol_sad_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
341     WORD32 i4_ol_satd_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
342 
343     /************************************************************************/
344     /* The fields with the string 'type2' in their names are required */
345     /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
346     /* to the bit_depth != internal_bit_depth are stored in these fields */
347     /************************************************************************/
348     WORD32 i4_ol_sad_type2_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
349     WORD32 i4_ol_satd_type2_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
350 
351     /*Store the HEVC frame level qp for level modulation*/
352     WORD32 i4_hevc_qp;
353     /*Store the frame level qscale for level modulation*/
354     WORD32 i4_qscale;
355 #if POW_OPT
356     /* Averge activity of 8x8 blocks from previous frame
357     *  If L1, maps to 16*16 in L0
358     */
359     long double ld_curr_frame_8x8_log_avg[2];
360 
361     /* Averge activity of 16x16 blocks from previous frame
362     *  If L1, maps to 32*32 in L0
363     */
364     long double ld_curr_frame_16x16_log_avg[3];
365 
366     /* Averge activity of 32x32 blocks from previous frame
367     *  If L1, maps to 64*64 in L0
368     */
369     long double ld_curr_frame_32x32_log_avg[3];
370 #else
371     /* Averge activity of 8x8 blocks from previous frame
372     *  If L1, maps to 16*16 in L0
373     */
374     LWORD64 i8_curr_frame_8x8_avg_act[2];
375 
376     /* Averge activity of 16x16 blocks from previous frame
377     *  If L1, maps to 32*32 in L0
378     */
379     LWORD64 i8_curr_frame_16x16_avg_act[3];
380 
381     /* Averge activity of 32x32 blocks from previous frame
382     *  If L1, maps to 64*64 in L0
383     */
384     LWORD64 i8_curr_frame_32x32_avg_act[3];
385 #endif
386     /** Frame-levelSATD cost accumalator */
387     LWORD64 i8_frame_acc_satd_cost;
388 
389     /** Frame-levelSATD accumalator */
390     LWORD64 i8_frame_acc_satd;
391 
392     /** Frame-level activity factor for CU 8x8 accumalator */
393     LWORD64 i8_frame_acc_act_factor;
394 
395     /** Frame-level Mode Bits cost accumalator */
396     LWORD64 i8_frame_acc_mode_bits_cost;
397 
398     /** Encoder quality preset : See IHEVCE_QUALITY_CONFIG_T for presets */
399     WORD32 i4_quality_preset;
400 
401     /** Frame-level SATD/qp accumulator in q10 format*/
402     LWORD64 i8_frame_acc_satd_by_modqp_q10;
403 
404     /** For testing EIID only. */
405     UWORD32 u4_num_16x16_skips_at_L0_IPE;
406 
407     /** Reference sample array. Used as local variable in mode_eval_filtering  */
408     UWORD8 au1_ref_samples[1028];
409     /** filtered reference sample array. Used as local variable in mode_eval_filtering */
410     UWORD8 au1_filt_ref_samples[1028];
411     /** array for the modes to be evaluated. Used as local variable in mode_eval_filtering */
412     UWORD8 au1_modes_to_eval[MAX_NUM_IP_MODES];
413     /** temp array for the modes to be evaluated. Used as local variable in mode_eval_filtering */
414     UWORD8 au1_modes_to_eval_temp[MAX_NUM_IP_MODES];
415     /** pred samples array. Used as local variable in mode_eval_filtering */
416     MEM_ALIGN32 UWORD8 au1_pred_samples[4096];
417     /** array for storing satd cost. Used as local variable in mode_eval_filtering*/
418     UWORD16 au2_mode_bits_satd_cost[MAX_NUM_IP_MODES];
419     /** array for storing satd values. used as local variable in mode_eval_filtering */
420     UWORD16 au2_mode_bits_satd[MAX_NUM_IP_MODES];
421 
422     /** reference data, local for pu_calc_8x8 */
423     UWORD8 au1_ref_8x8pu[4][18];
424     /** mode_bits_cost, local for pu_calc_8x8 */
425     UWORD16 au2_mode_bits_cost_8x8pu[4][MAX_NUM_IP_MODES];
426     /** mode_bits, local for pu_calc_8x8 */
427     UWORD16 au2_mode_bits_8x8_pu[MAX_NUM_IP_MODES];
428 
429     /** tranform coeff temp, local to ihevce_pu_calc_4x4_blk */
430     WORD16 *pi2_trans_tmp;  //this memory is overlayed with au1_pred_samples[4096]. First half.
431 
432     /** tranform coeff out, local to ihevce_pu_calc_4x4_blk */
433     WORD16 *pi2_trans_out;  //this memory is overlayed with au1_pred_samples[4096]. Second half.
434 
435     UWORD8 u1_use_lambda_derived_from_min_8x8_act_in_ctb;
436 
437     UWORD8 u1_bit_depth;
438 
439     rc_quant_t *ps_rc_quant_ctxt;
440     /** Flag that specifies whether to use SATD or SAD in L0 IPE */
441     UWORD8 u1_use_satd;
442 
443     /** Flag that specifies level of refinement */
444     UWORD8 u1_level_1_refine_on;
445 
446     /** Flag indicates that child mode decision is disabled in L0 IPE recur bracketing */
447     UWORD8 u1_disable_child_cu_decide;
448 
449     /*Modulation factor*/
450     WORD32 ai4_mod_factor_derived_by_variance[2];
451     float f_strength;
452     WORD32 i4_l0ipe_qp_mod;
453 
454     WORD32 i4_frm_qp;
455     WORD32 i4_temporal_layer;
456     WORD32 i4_pass;
457 
458     double f_i_pic_lamda_modifier;
459     WORD32 i4_use_const_lamda_modifier;
460     WORD32 i4_is_ref_pic;
461     LWORD64 i8_curr_frame_avg_mean_act;
462     WORD32 i4_enable_noise_detection;
463 
464     ihevce_ipe_optimised_function_list_t s_ipe_optimised_function_list;
465 
466     ihevce_cmn_opt_func_t s_cmn_opt_func;
467 
468 } ihevce_ipe_ctxt_t;
469 
470 /**
471 ******************************************************************************
472  *  @brief    IPE module overall context
473 ******************************************************************************
474  */
475 typedef struct
476 {
477     /*array of ipe ctxt */
478     ihevce_ipe_ctxt_t *aps_ipe_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
479 
480     /** Number of processing threads created run time */
481     WORD32 i4_num_proc_thrds;
482 
483 } ihevce_ipe_master_ctxt_t;
484 
485 /*****************************************************************************/
486 /* Extern Variable Declarations                                              */
487 /*****************************************************************************/
488 
489 /*****************************************************************************/
490 /* Extern Function Declarations                                              */
491 /*****************************************************************************/
492 void ihevce_ipe_analyse_update_cost(
493     ihevce_ipe_cu_tree_t *ps_cu_node, UWORD8 u1_mode, DOUBLE lf_cost);
494 #endif /* _IHEVCE_IPE_STRUCTS_H_ */
495