1 /******************************************************************************
2 *
3 * Copyright (C) 2018 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /*!
22 ******************************************************************************
23 * \file ihevce_recur_bracketing.c
24 *
25 * \brief
26 * This file contains interface functions of recursive bracketing
27 * module
28 * \date
29 * 12/02/2012
30 *
31 * \author
32 * Ittiam
33 *
34 * List of Functions
35 *
36 *
37 ******************************************************************************
38 */
39
40 /*****************************************************************************/
41 /* File Includes */
42 /*****************************************************************************/
43 /* System include files */
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <assert.h>
48 #include <stdarg.h>
49 #include <math.h>
50
51 /* User include files */
52 #include "ihevc_typedefs.h"
53 #include "itt_video_api.h"
54 #include "ihevce_api.h"
55
56 #include "rc_cntrl_param.h"
57 #include "rc_frame_info_collector.h"
58 #include "rc_look_ahead_params.h"
59
60 #include "ihevc_defs.h"
61 #include "ihevc_structs.h"
62 #include "ihevc_platform_macros.h"
63 #include "ihevc_deblk.h"
64 #include "ihevc_itrans_recon.h"
65 #include "ihevc_chroma_itrans_recon.h"
66 #include "ihevc_chroma_intra_pred.h"
67 #include "ihevc_intra_pred.h"
68 #include "ihevc_inter_pred.h"
69 #include "ihevc_mem_fns.h"
70 #include "ihevc_padding.h"
71 #include "ihevc_weighted_pred.h"
72 #include "ihevc_sao.h"
73 #include "ihevc_resi_trans.h"
74 #include "ihevc_quant_iquant_ssd.h"
75 #include "ihevc_cabac_tables.h"
76
77 #include "ihevce_defs.h"
78 #include "ihevce_lap_enc_structs.h"
79 #include "ihevce_multi_thrd_structs.h"
80 #include "ihevce_me_common_defs.h"
81 #include "ihevce_had_satd.h"
82 #include "ihevce_error_codes.h"
83 #include "ihevce_bitstream.h"
84 #include "ihevce_cabac.h"
85 #include "ihevce_rdoq_macros.h"
86 #include "ihevce_function_selector.h"
87 #include "ihevce_enc_structs.h"
88 #include "ihevce_entropy_structs.h"
89 #include "ihevce_cmn_utils_instr_set_router.h"
90 #include "ihevce_enc_loop_structs.h"
91 #include "ihevce_ipe_instr_set_router.h"
92 #include "ihevce_ipe_structs.h"
93 #include "ihevce_ipe_pass.h"
94 #include "ihevce_recur_bracketing.h"
95 #include "ihevce_nbr_avail.h"
96 #include "ihevc_common_tables.h"
97 #include "ihevce_decomp_pre_intra_structs.h"
98 #include "ihevce_decomp_pre_intra_pass.h"
99
100 #include "cast_types.h"
101 #include "osal.h"
102 #include "osal_defaults.h"
103
104 /*****************************************************************************/
105 /* Constant Macros */
106 /*****************************************************************************/
107 #define IP_DBG_L1_l2 0
108 #define CHILD_BIAS 12
109
110 /*****************************************************************************/
111 /* Globals */
112 /*****************************************************************************/
113 extern pf_intra_pred g_apf_lum_ip[10];
114
115 extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES];
116
117 UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7,
118 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1,
119 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 };
120
121 UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0,
122 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7,
123 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 };
124
125 #define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit))
126
127 /*****************************************************************************/
128 /* Function Definitions */
129 /*****************************************************************************/
130
131 /*!
132 ******************************************************************************
133 * \if Function name : ihevce_update_cand_list \endif
134 *
135 * \brief
136 * Final Candidate list population, nbr flag andd nbr mode update function
137 *
138 * \param[in] ps_row_cu : pointer to cu analyse struct
139 * \param[in] ps_cu_node : pointer to cu node info buffer
140 * \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer
141 * \param[in] pu1_cand_mode_list : pointer to candidate list buffer
142 *
143 * \return
144 * None
145 *
146 * \author
147 * Ittiam
148 *
149 *****************************************************************************
150 */
ihevce_update_cand_list(ihevce_ipe_cu_tree_t * ps_cu_node,ihevce_ed_blk_t * ps_ed_blk_l1,ihevce_ipe_ctxt_t * ps_ctxt)151 void ihevce_update_cand_list(
152 ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt)
153 {
154 WORD32 row, col, x, y, size;
155
156 /* Candidate mode Update */
157 (void)ps_ed_blk_l1;
158 /* Update CTB mode map for the finalised CU */
159 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
160 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
161 size = ps_cu_node->u1_cu_size >> 2;
162 for(row = y; row < (y + size); row++)
163 {
164 for(col = x; col < (x + size); col++)
165 {
166 ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode;
167 }
168 }
169 return;
170 }
171
172 /*!
173 ******************************************************************************
174 * \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif
175 *
176 * \brief
177 * Mpm indx calc function based on left and top available modes
178 *
179 * \param[in] top_intra_mode : Top available intra mode
180 * \param[in] left_intra_mode : Left available intra mode
181 * \param[in] available_top : Top availability flag
182 * \param[in] available_left : Left availability flag
183 * \param[in] cu_pos_y : cu position wrt to CTB
184 * \param[in] mode_bits_cost : pointer to mode bits buffer
185 * \param[in] lambda : Lambda value (SAD/SATD)
186 * \param[in] cand_mode_list : pointer to candidate list buffer
187 *
188 * \return
189 * None
190 *
191 * \author
192 * Ittiam
193 *
194 *****************************************************************************
195 */
ihevce_intra_populate_mode_bits_cost_bracketing(WORD32 top_intra_mode,WORD32 left_intra_mode,WORD32 available_top,WORD32 available_left,WORD32 cu_pos_y,UWORD16 * mode_bits_cost,UWORD16 * mode_bits,WORD32 lambda,WORD32 * cand_mode_list)196 void ihevce_intra_populate_mode_bits_cost_bracketing(
197 WORD32 top_intra_mode,
198 WORD32 left_intra_mode,
199 WORD32 available_top,
200 WORD32 available_left,
201 WORD32 cu_pos_y,
202 UWORD16 *mode_bits_cost,
203 UWORD16 *mode_bits,
204 WORD32 lambda,
205 WORD32 *cand_mode_list)
206 {
207 /* local variables */
208 WORD32 i;
209 WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
210
211 UWORD16 one_bits_cost =
212 COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda
213 UWORD16 two_bits_cost =
214 COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda
215 UWORD16 five_bits_cost =
216 COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda
217
218 for(i = 0; i < 35; i++)
219 {
220 mode_bits_cost[i] = five_bits_cost;
221 mode_bits[i] = 5;
222 }
223
224 /* EIID: set availability flag to zero if modes are invalid.
225 Required since some CU's might be skipped (though available)
226 and their modes will be set to 255 (-1)*/
227 if(35 < top_intra_mode || 0 > top_intra_mode)
228 available_top = 0;
229 if(35 < left_intra_mode || 0 > left_intra_mode)
230 available_left = 0;
231
232 /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
233 /* N = top */
234 if(0 == available_top)
235 {
236 cand_intra_pred_mode_top = INTRA_DC;
237 }
238 /* for neighbour != INTRA, setting DC is done outside */
239 else if(0 == cu_pos_y) /* It's on the CTB boundary */
240 {
241 cand_intra_pred_mode_top = INTRA_DC;
242 }
243 else
244 {
245 cand_intra_pred_mode_top = top_intra_mode;
246 }
247
248 /* N = left */
249 if(0 == available_left)
250 {
251 cand_intra_pred_mode_left = INTRA_DC;
252 //cand_intra_pred_mode_left = cand_intra_pred_mode_top;
253 }
254 /* for neighbour != INTRA, setting DC is done outside */
255 else
256 {
257 cand_intra_pred_mode_left = left_intra_mode;
258 }
259
260 /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
261 if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
262 {
263 if(cand_intra_pred_mode_left < 2)
264 {
265 cand_mode_list[0] = INTRA_PLANAR;
266 cand_mode_list[1] = INTRA_DC;
267 cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
268 }
269 else
270 {
271 cand_mode_list[0] = cand_intra_pred_mode_left;
272 cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
273 cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
274 }
275 }
276 else
277 {
278 if(0 == available_left)
279 {
280 cand_mode_list[0] = cand_intra_pred_mode_top;
281 cand_mode_list[1] = cand_intra_pred_mode_left;
282 }
283 else
284 {
285 cand_mode_list[0] = cand_intra_pred_mode_left;
286 cand_mode_list[1] = cand_intra_pred_mode_top;
287 }
288 if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
289 (cand_intra_pred_mode_top != INTRA_PLANAR))
290 {
291 cand_mode_list[2] = INTRA_PLANAR;
292 }
293 else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
294 {
295 cand_mode_list[2] = INTRA_DC;
296 }
297 else
298 {
299 cand_mode_list[2] = INTRA_ANGULAR(26);
300 }
301 }
302 mode_bits_cost[cand_mode_list[0]] = one_bits_cost;
303 mode_bits_cost[cand_mode_list[1]] = two_bits_cost;
304 mode_bits_cost[cand_mode_list[2]] = two_bits_cost;
305
306 mode_bits[cand_mode_list[0]] = 2;
307 mode_bits[cand_mode_list[1]] = 3;
308 mode_bits[cand_mode_list[2]] = 3;
309 }
310
311 /*!
312 ******************************************************************************
313 * \if Function name : ihevce_pu_calc_4x4_blk \endif
314 *
315 * \brief
316 * 4x4 pu (8x8 CU) mode decision using step 8421 method
317 *
318 * \param[in] ps_cu_node : pointer to cu node info buffer
319 * \param[in] pu1_src : pointer to src pixels
320 * \param[in] src_stride : frm source stride
321 * \param[in] ref : pointer to reference pixels for prediction
322 * \param[in] cand_mode_list : pointer to candidate list buffer
323 * \param[in] best_costs_4x4 : pointer to 3 best cost buffer
324 * \param[in] best_modes_4x4 : pointer to 3 best mode buffer
325 *
326 * \return
327 * None
328 *
329 * \author
330 * Ittiam
331 *
332 *****************************************************************************
333 */
ihevce_pu_calc_4x4_blk(ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,UWORD8 * pu1_src,WORD32 src_stride,UWORD8 * ref,UWORD16 * mode_bits_cost,WORD32 * best_costs_4x4,UWORD8 * best_modes_4x4,func_selector_t * ps_func_selector)334 void ihevce_pu_calc_4x4_blk(
335 ihevce_ipe_ctxt_t *ps_ctxt,
336 ihevce_ipe_cu_tree_t *ps_cu_node,
337 UWORD8 *pu1_src,
338 WORD32 src_stride,
339 UWORD8 *ref,
340 UWORD16 *mode_bits_cost,
341 WORD32 *best_costs_4x4,
342 UWORD8 *best_modes_4x4,
343 func_selector_t *ps_func_selector)
344 {
345 WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp;
346 WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out;
347 UWORD8 u1_use_satd = ps_ctxt->u1_use_satd;
348 UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on;
349
350 WORD32 i, j = 0, i_end;
351 UWORD8 mode, best_amode = 255;
352 UWORD8 pred[16];
353
354 UWORD16 sad;
355 WORD32 sad_cost = 0;
356 WORD32 best_asad_cost = 0xFFFFF;
357 WORD32 temp;
358 UWORD8 modes_to_eval[5];
359 WORD32 costs_4x4[5];
360 UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
361
362 /* LO resolution hence low resolution disable */
363 WORD32 u1_low_resol = 0;
364 UWORD8 au1_best_modes[1] = { 0 };
365 WORD32 ai4_best_sad_costs[1] = { 0 };
366
367 WORD16 *pi2_tmp = &pi2_trans_tmp[0];
368
369 ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list =
370 &ps_ctxt->s_ipe_optimised_function_list;
371
372 //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1;
373 //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit;
374
375 for(i = 0; i < 5; i++)
376 {
377 costs_4x4[i] = MAX_INTRA_COST_IPE;
378 }
379
380 ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
381 pu1_src,
382 src_stride,
383 ref,
384 mode_bits_cost,
385 au1_best_modes,
386 ai4_best_sad_costs,
387 u1_low_resol,
388 ps_ipe_optimised_function_list->pf_4x4_sad_computer);
389
390 best_amode = au1_best_modes[0];
391 best_asad_cost = ai4_best_sad_costs[0];
392
393 ASSERT(best_amode != 255);
394 /* Around best level 4 angular mode, search for best level 2 mode */
395 modes_to_eval[0] = best_amode - 2;
396 modes_to_eval[1] = best_amode + 2;
397 i = 0;
398 i_end = 2;
399 if(best_amode == 2)
400 i = 1;
401 else if(best_amode == 34)
402 i_end = 1;
403 for(; i < i_end; i++)
404 {
405 mode = modes_to_eval[i];
406
407 g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
408
409 sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
410
411 sad_cost = sad;
412 sad_cost += mode_bits_cost[mode];
413
414 if(sad_cost < best_asad_cost)
415 {
416 best_amode = mode;
417 best_asad_cost = sad_cost;
418 }
419 }
420
421 /* Around best level 2 angular mode, search for best level 1 mode */
422 /* Also evaluate for non-angular mode */
423
424 i = 0;
425 /*Level 1 refinement is disabled for ES preset */
426 if(1 == u1_level_1_refine_on)
427 {
428 if(best_amode != 2)
429 modes_to_eval[i++] = best_amode - 1;
430 modes_to_eval[i++] = best_amode;
431 }
432
433 modes_to_eval[i++] = 0;
434 modes_to_eval[i++] = 1;
435
436 if(1 == u1_level_1_refine_on)
437 {
438 if(best_amode != 34)
439 modes_to_eval[i++] = best_amode + 1;
440 }
441 i_end = i;
442 i = 0;
443
444 for(; i < i_end; i++)
445 {
446 mode = modes_to_eval[i];
447
448 g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
449
450 /* Hard coding to use SATD */
451 if(u1_use_satd)
452 {
453 ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr(
454 pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, 4, NULL_PLANE);
455
456 sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4);
457 }
458 else
459 {
460 sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(
461 pu1_src, &pred[0], src_stride, 4);
462 }
463 sad_cost = sad;
464 sad_cost += mode_bits_cost[mode];
465
466 costs_4x4[i] = sad_cost;
467 }
468
469 /* Arrange the reference array in ascending order */
470 for(i = 0; i < (i_end - 1); i++)
471 {
472 for(j = i + 1; j < i_end; j++)
473 {
474 if(costs_4x4[i] > costs_4x4[j])
475 {
476 temp = costs_4x4[i];
477 costs_4x4[i] = costs_4x4[j];
478 costs_4x4[j] = temp;
479
480 temp = modes_4x4[i];
481 modes_4x4[i] = modes_4x4[j];
482 modes_4x4[j] = temp;
483 }
484 }
485 }
486 for(i = 0; i < 3; i++)
487 {
488 best_costs_4x4[i] = costs_4x4[i];
489 best_modes_4x4[i] = modes_to_eval[modes_4x4[i]];
490 }
491
492 {
493 ps_cu_node->best_mode = best_modes_4x4[0];
494 ps_cu_node->best_cost = best_costs_4x4[0];
495 ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode];
496 }
497 }
498
499 /*!
500 ******************************************************************************
501 * \if Function name : ihevce_pu_calc_8x8_blk \endif
502 *
503 * \brief
504 * 4x4 pu (8x8 CU) mode decision loop using step 8421 method
505 *
506 * \param[in] ps_curr_src : pointer to src pixels struct
507 * \param[in] ps_ctxt : pointer to IPE context struct
508 * \param[in] ps_cu_node : pointer to cu node info buffer
509 *
510 * \return
511 * None
512 *
513 * \author
514 * Ittiam
515 *
516 *****************************************************************************
517 */
ihevce_pu_calc_8x8_blk(iv_enc_yuv_buf_t * ps_curr_src,ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,func_selector_t * ps_func_selector)518 void ihevce_pu_calc_8x8_blk(
519 iv_enc_yuv_buf_t *ps_curr_src,
520 ihevce_ipe_ctxt_t *ps_ctxt,
521 ihevce_ipe_cu_tree_t *ps_cu_node,
522 func_selector_t *ps_func_selector)
523 {
524 WORD32 i, j;
525 WORD32 nbr_flags;
526 nbr_avail_flags_t s_nbr;
527 WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1;
528
529 UWORD8 *pu1_src_4x4;
530 WORD32 xA, xB, yA, yB;
531 //WORD32 x, y, size;
532 WORD32 top_intra_mode;
533 WORD32 left_intra_mode;
534 // WORD8 *top_intra_mode_ptr;
535 // WORD8 *left_intra_mode_ptr;
536 UWORD8 *pu1_orig;
537 WORD32 src_strd = ps_curr_src->i4_y_strd;
538
539 WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1;
540 WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1;
541 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
542
543 ihevc_intra_pred_luma_ref_substitution_fptr =
544 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
545
546 pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) +
547 ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) +
548 (ps_cu_node->ps_parent->u2_x0 << 3);
549 for(i = 0; i < 2; i++)
550 {
551 for(j = 0; j < 2; j++)
552 {
553 WORD32 cand_mode_list[3];
554 pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size);
555 /* get the neighbour availability flags */
556 nbr_flags = ihevce_get_nbr_intra(
557 &s_nbr,
558 ps_ctxt->pu1_ctb_nbr_map,
559 ps_ctxt->i4_nbr_map_strd,
560 cu_pos_x + ((j) * (trans_size >> 2)),
561 cu_pos_y + ((i) * (trans_size >> 2)),
562 trans_size >> 2);
563
564 /* call the function which populates sad cost for all the modes */
565 xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j;
566 yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
567 xB = xA + 1;
568 yB = yA - 1;
569 left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
570 top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
571
572 ihevce_intra_populate_mode_bits_cost_bracketing(
573 top_intra_mode,
574 left_intra_mode,
575 s_nbr.u1_top_avail,
576 s_nbr.u1_left_avail,
577 ps_cu_node->ps_parent->u2_y0,
578 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
579 &ps_ctxt->au2_mode_bits_8x8_pu[0],
580 ps_ctxt->i4_ol_sad_lambda,
581 cand_mode_list);
582
583 /* call the function which populates ref data for intra predicion */
584 ihevc_intra_pred_luma_ref_substitution_fptr(
585 pu1_src_4x4 - src_strd - 1,
586 pu1_src_4x4 - src_strd,
587 pu1_src_4x4 - 1,
588 src_strd,
589 4,
590 nbr_flags,
591 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
592 0);
593
594 ihevce_pu_calc_4x4_blk(
595 ps_ctxt,
596 ps_cu_node->ps_sub_cu[(i * 2) + j],
597 pu1_src_4x4,
598 src_strd,
599 &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
600 &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
601 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0],
602 &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0],
603 ps_func_selector);
604
605 /*&au4_cost_4x4[i*2 + j][0],
606 &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition
607
608 ihevce_set_nbr_map(
609 ps_ctxt->pu1_ctb_nbr_map,
610 ps_ctxt->i4_nbr_map_strd,
611 cu_pos_x + ((j) * (trans_size >> 2)),
612 cu_pos_y + ((i) * (trans_size >> 2)),
613 (trans_size >> 2),
614 1);
615
616 xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j;
617 yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
618 ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode;
619 ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost =
620 ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode];
621 }
622 }
623 }
624
625 /*!
626 ******************************************************************************
627 * \if Function name : ihevce_bracketing_analysis \endif
628 *
629 * \brief
630 * Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size
631 * info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn
632 * 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt
633 *
634 * \param[in] ps_ctxt : pointer to IPE context struct
635 * \param[in] ps_cu_node : pointer to cu node info buffer
636 * \param[in] ps_curr_src : pointer to src pixels struct
637 * \param[in] ps_ctb_out : pointer to ip ctb out struct
638 * \param[in] ps_row_cu : pointer to cu analyse struct
639 * \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct
640 * \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct
641 * \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct
642 *
643 * \return
644 * None
645 *
646 * \author
647 * Ittiam
648 *
649 *****************************************************************************
650 */
ihevce_bracketing_analysis(ihevce_ipe_ctxt_t * ps_ctxt,ihevce_ipe_cu_tree_t * ps_cu_node,iv_enc_yuv_buf_t * ps_curr_src,ctb_analyse_t * ps_ctb_out,ihevce_ed_blk_t * ps_ed_l1_ctb,ihevce_ed_blk_t * ps_ed_l2_ctb,ihevce_ed_ctb_l1_t * ps_ed_ctb_l1,ipe_l0_ctb_analyse_for_me_t * ps_l0_ipe_out_ctb)651 void ihevce_bracketing_analysis(
652 ihevce_ipe_ctxt_t *ps_ctxt,
653 ihevce_ipe_cu_tree_t *ps_cu_node,
654 iv_enc_yuv_buf_t *ps_curr_src,
655 ctb_analyse_t *ps_ctb_out,
656 //cu_analyse_t *ps_row_cu,
657 ihevce_ed_blk_t *ps_ed_l1_ctb,
658 ihevce_ed_blk_t *ps_ed_l2_ctb,
659 ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
660 ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb)
661 {
662 WORD32 cu_pos_x = 0;
663 WORD32 cu_pos_y = 0;
664
665 UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width;
666 UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height;
667 WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3);
668 WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3);
669
670 ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb;
671 ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb;
672
673 WORD32 i;
674 WORD32 cand_mode_list[3];
675 //cu_analyse_t *ps_curr_cu = ps_row_cu;
676 WORD32 blk_cnt = 0;
677 WORD32 j = 0;
678 WORD32 merge_32x32_l1, merge_32x32_l2;
679
680 WORD32 i4_skip_intra_eval_32x32_l1;
681 //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block
682
683 WORD32 parent_cost = 0;
684 WORD32 child_cost[4] = { 0 };
685 WORD32 child_cost_least = 0;
686 WORD32 child_satd[4] = { 0 };
687 WORD32 x, y, size;
688 WORD32 merge_64x64 = 1;
689 UWORD8 au1_best_32x32_modes[4];
690 WORD32 au4_best_32x32_cost[4];
691 WORD32 parent_best_mode;
692 UWORD8 best_mode;
693
694 WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
695 /* flag to control 1CU-4TU modes based on quality preset */
696 /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */
697 WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
698 (i4_quality_preset == IHEVCE_QUALITY_P0);
699
700 /* flag to control 4CU-16TU mode based on quality preset */
701 /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/
702 WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
703 (i4_quality_preset == IHEVCE_QUALITY_P0);
704
705 WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2;
706 float f_strength;
707 /* Accumalte satd */
708 LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0;
709 WORD32 i4_ctb_acc_satd = 0;
710
711 /* Accumalate Mode bits cost */
712 LWORD64 i8_frame_acc_mode_bits_cost = 0;
713
714 /* Step2 is bypassed for parent, uses children modes*/
715 WORD32 step2_bypass = 1;
716
717 if(1 == ps_ctxt->u1_disable_child_cu_decide)
718 step2_bypass = 0;
719
720 ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree;
721 for(i = 0; i < 4; i++)
722 {
723 ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i;
724 }
725
726 /* Loop for all 8x8 block in a CTB */
727 ps_ctb_out->u4_cu_split_flags = 0x1;
728
729 /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */
730 for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++)
731 {
732 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE;
733 }
734
735 for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++)
736 {
737 ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE;
738 }
739
740 for(i = 0; i < (MAX_CU_IN_CTB); i++)
741 {
742 ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE;
743 }
744
745 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE;
746
747 /* by default 64x64 modes are set to default values DC and Planar */
748 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0;
749 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1;
750 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255;
751
752 /* by default 64x4 split is set to 1 */
753 ps_l0_ipe_out_ctb->u1_split_flag = 1;
754
755 /* Modulation factor calculated based on spatial variance instead of hardcoded val*/
756 i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16;
757
758 f_strength = ps_ctxt->f_strength;
759
760 /* ------------------------------------------------ */
761 /* populate the early decisions done by L1 analysis */
762 /* ------------------------------------------------ */
763 for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++)
764 {
765 ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[i] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i];
766 ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[i] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i];
767 ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[i] = ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i];
768 ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[i] = ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i];
769 }
770
771 /* Init CTB level accumalated SATD and MPM bits */
772 ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0;
773 ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0;
774
775 /* ------------------------------------------------ */
776 /* Loop over all the blocks in current CTB */
777 /* ------------------------------------------------ */
778 {
779 /* 64 8x8 blocks should be encountered for the do,while loop to exit */
780 do
781 {
782 intra32_analyse_t *ps_intra32_analyse;
783 intra16_analyse_t *ps_intra16_analyse;
784 WORD32 *pi4_intra_32_cost;
785 WORD32 *pi4_intra_16_cost;
786 WORD32 *pi4_intra_8_cost;
787 WORD32 merge_16x16_l1;
788
789 /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */
790 cu_pos_x = gau1_cu_pos_x[blk_cnt];
791 cu_pos_y = gau1_cu_pos_y[blk_cnt];
792
793 /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */
794 au1_best_32x32_modes[blk_cnt >> 4] = 255;
795
796 /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */
797 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
798 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4];
799
800 /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
801 /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
802 ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2];
803
804 /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */
805 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
806 {
807 /* Reset to zero for every cu decision */
808 merge_32x32_l1 = 0;
809
810 child_cost_least = 0;
811
812 /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */
813 ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2);
814
815 pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4];
816
817 /* by default 32x32 modes are set to default values DC and Planar */
818 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0;
819 ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1;
820 ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255;
821
822 /* By default 32x32 split is set to 1 */
823 ps_intra32_analyse->b1_split_flag = 1;
824
825 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0;
826 ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1;
827 ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255;
828
829 /* 16x16 cost & 8x8 cost are stored in Raster scan order */
830 /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */
831 /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */
832 {
833 WORD32 pos_x_8x8, pos_y_8x8;
834
835 pos_x_8x8 = gau1_cu_pos_x[blk_cnt];
836 pos_y_8x8 = gau1_cu_pos_y[blk_cnt];
837
838 pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0];
839
840 pi4_intra_16_cost +=
841 ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1)));
842
843 pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0];
844
845 pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW));
846 }
847
848 merge_32x32_l1 = 0;
849 merge_32x32_l2 = 0;
850 i4_skip_intra_eval_32x32_l1 = 0;
851
852 /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */
853 merge_16x16_l1 = 0;
854 if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2))
855 {
856 #if !ENABLE_UNIFORM_CU_SIZE_8x8
857 merge_16x16_l1 = ps_ed_blk_l1->merge_success;
858 #else
859 merge_16x16_l1 = 0;
860 #endif
861 }
862
863 /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */
864 if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4))
865 {
866 /* Check 4 flags of L1(8x8) say merge */
867 for(i = 0; i < 4; i++)
868 {
869 merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success;
870
871 //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval
872 i4_skip_intra_eval_32x32_l1 +=
873 ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0;
874 }
875
876 #if !ENABLE_UNIFORM_CU_SIZE_8x8
877 /* Check 1 flag from L2(16x16) say merge */
878 merge_32x32_l2 = ps_ed_blk_l2->merge_success;
879 #else
880 merge_32x32_l1 = 0;
881 merge_32x32_l2 = 0;
882 #endif
883 }
884
885 #if DISABLE_L2_IPE_IN_PB_L1_IN_B
886 if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE))
887 {
888 merge_32x32_l2 = 0;
889 ps_ed_blk_l2->merge_success = 0;
890 }
891 #endif
892
893 ps_intra32_analyse->b1_valid_cu = 1;
894
895 /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */
896 /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval,
897 if no blocks to be skipped then eval entire 32x32,
898 else break the merge and go to 16x16 level eval */
899 if((merge_32x32_l1 == 4) && merge_32x32_l2 &&
900 ((i4_skip_intra_eval_32x32_l1 == 0) ||
901 (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge
902 )
903 {
904 #if IP_DBG_L1_l2
905 /* Populate params for 32x32 block analysis */
906 ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
907
908 ps_cu_node->ps_parent->u1_cu_size = 32;
909 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
910 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
911 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode;
912 /* CU size 32x32 and fill the final cu params */
913
914 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
915
916 /* Increment pointers */
917 ps_ed_blk_l1 += 16;
918 blk_cnt += 16;
919 ps_row_cu++;
920 merge_64x64 &= 1;
921 #else
922
923 /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/
924 if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE))
925 {
926 WORD32 i4_local_ctr1, i4_local_ctr2;
927
928 ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
929
930 ps_cu_node->ps_parent->u1_cu_size = 32;
931 ps_cu_node->ps_parent->u2_x0 =
932 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
933 ps_cu_node->ps_parent->u2_y0 =
934 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
935 ps_cu_node->ps_parent->best_mode =
936 INTRA_DC; //ps_ed_blk_l2->best_merge_mode;
937 /* CU size 32x32 and fill the final cu params */
938
939 /* fill in the first modes as invalid */
940 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
941 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
942 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
943 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
944
945 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
946 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
947 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
948
949 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
950
951 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
952 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
953
954 ps_intra32_analyse->b1_valid_cu = 0;
955 ps_intra32_analyse->b1_split_flag = 0;
956 ps_intra32_analyse->b1_merge_flag = 0;
957 /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu,
958 255,
959 NUM_BEST_MODES);
960 memset (&ps_intra32_analyse->au1_best_modes_16x16_tu,
961 255,
962 NUM_BEST_MODES);*/
963 //set only first mode since if it's 255. it wont go ahead
964 ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255;
965 ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255;
966
967 *pi4_intra_32_cost = MAX_INTRA_COST_IPE;
968
969 /*since ME will start evaluating from bottom up, set the lower
970 cu size data invalid */
971 for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++)
972 {
973 WORD32 *pi4_intra_8_cost_curr16;
974
975 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
976 .au1_best_modes_16x16_tu[0] = 255;
977 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
978 .au1_best_modes_8x8_tu[0] = 255;
979 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0;
980 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0;
981 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0;
982
983 pi4_intra_16_cost
984 [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) *
985 (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE;
986
987 pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1);
988 pi4_intra_8_cost_curr16 +=
989 ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW;
990
991 for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++)
992 {
993 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
994 .as_intra8_analyse[i4_local_ctr2]
995 .au1_4x4_best_modes[0][0] = 255;
996 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
997 .as_intra8_analyse[i4_local_ctr2]
998 .au1_4x4_best_modes[1][0] = 255;
999 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1000 .as_intra8_analyse[i4_local_ctr2]
1001 .au1_4x4_best_modes[2][0] = 255;
1002 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1003 .as_intra8_analyse[i4_local_ctr2]
1004 .au1_4x4_best_modes[3][0] = 255;
1005 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1006 .as_intra8_analyse[i4_local_ctr2]
1007 .au1_best_modes_8x8_tu[0] = 255;
1008 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1009 .as_intra8_analyse[i4_local_ctr2]
1010 .au1_best_modes_4x4_tu[0] = 255;
1011 ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
1012 .as_intra8_analyse[i4_local_ctr2]
1013 .b1_valid_cu = 0;
1014
1015 pi4_intra_8_cost_curr16
1016 [(i4_local_ctr2 & 1) +
1017 (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] =
1018 MAX_INTRA_COST_IPE;
1019 }
1020 }
1021
1022 /* set neighbours even if intra is not evaluated, since source is always available. */
1023 ihevce_set_nbr_map(
1024 ps_ctxt->pu1_ctb_nbr_map,
1025 ps_ctxt->i4_nbr_map_strd,
1026 ps_cu_node->ps_parent->u2_x0 << 1,
1027 ps_cu_node->ps_parent->u2_y0 << 1,
1028 (ps_cu_node->ps_parent->u1_cu_size >> 2),
1029 1);
1030
1031 /* cost accumalation of best cu size candiate */
1032 /*i8_frame_acc_satd_cost += parent_cost;*/
1033
1034 /* Mode bits cost accumalation for best cu size and cu mode */
1035 /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/
1036
1037 /*satd/mod_qp accumulation of best cu */
1038 /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/
1039
1040 /* Increment pointers */
1041 ps_ed_blk_l1 += 16;
1042 blk_cnt += 16;
1043 //ps_row_cu++;
1044 merge_64x64 = 0;
1045
1046 /* increment for stat purpose only. Increment is valid only on single thread */
1047 ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4;
1048 }
1049 else
1050 {
1051 /* Revaluation of 4 16x16 blocks at 8x8 prediction level */
1052 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1053
1054 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1055 (ps_ctxt->i4_slice_type == PSLICE))
1056 {
1057 ps_ctxt->u1_disable_child_cu_decide = 1;
1058 step2_bypass = 0;
1059 }
1060
1061 /* Based on the flag, Child modes decision can be disabled*/
1062 if(0 == ps_ctxt->u1_disable_child_cu_decide)
1063 {
1064 for(j = 0; j < 4; j++)
1065 {
1066 ps_cu_node->ps_sub_cu[j]->u2_x0 =
1067 gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */
1068 ps_cu_node->ps_sub_cu[j]->u2_y0 =
1069 gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */
1070 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16;
1071
1072 {
1073 WORD32 best_ang_mode =
1074 (ps_ed_blk_l1 + (j * 4))->best_merge_mode;
1075
1076 if(best_ang_mode < 2)
1077 best_ang_mode = 26;
1078
1079 ihevce_mode_eval_filtering(
1080 ps_cu_node->ps_sub_cu[j],
1081 ps_cu_node,
1082 ps_ctxt,
1083 ps_curr_src,
1084 best_ang_mode,
1085 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1086 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1087 !step2_bypass,
1088 1);
1089
1090 if(i4_enable_4cu_16tu)
1091 {
1092 ihevce_mode_eval_filtering(
1093 ps_cu_node->ps_sub_cu[j],
1094 ps_cu_node,
1095 ps_ctxt,
1096 ps_curr_src,
1097 best_ang_mode,
1098 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1099 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1100 !step2_bypass,
1101 0);
1102 }
1103 else
1104 {
1105 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1106 memcpy(
1107 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1108 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1109 NUM_BEST_MODES);
1110
1111 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1112 memcpy(
1113 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1114 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1115 NUM_BEST_MODES * sizeof(WORD32));
1116 }
1117
1118 child_cost[j] =
1119 MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1120 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
1121
1122 /* Child cost is sum of costs at 16x16 level */
1123 child_cost_least += child_cost[j];
1124
1125 /* Select the best mode to be populated as top and left nbr depending on the
1126 4tu and 1tu cost */
1127 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
1128 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
1129 {
1130 ps_cu_node->ps_sub_cu[j]->best_mode =
1131 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
1132 }
1133 else
1134 {
1135 ps_cu_node->ps_sub_cu[j]->best_mode =
1136 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
1137 }
1138
1139 { /* Update the CTB nodes only for MAX - 1 CU nodes */
1140 WORD32 xA, yA, row, col;
1141 xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
1142 yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
1143 size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
1144 for(row = yA; row < (yA + size); row++)
1145 {
1146 for(col = xA; col < (xA + size); col++)
1147 {
1148 ps_ctxt->au1_ctb_mode_map[row][col] =
1149 ps_cu_node->ps_sub_cu[j]->best_mode;
1150 }
1151 }
1152 }
1153 }
1154
1155 /*Child SATD cost*/
1156 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
1157
1158 /* store the child 16x16 costs */
1159 pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] =
1160 child_cost[j];
1161
1162 /* set the CU valid flag */
1163 ps_intra16_analyse[j].b1_valid_cu = 1;
1164
1165 /* All 16x16 merge is valid, if Cu 32x32 is chosen */
1166 /* To be reset, if CU 64x64 is chosen */
1167 ps_intra16_analyse[j].b1_merge_flag = 1;
1168
1169 /* storing the modes to intra 16 analyse */
1170 /* store the best 16x16 modes 8x8 tu */
1171 memcpy(
1172 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1173 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1174 sizeof(UWORD8) * (NUM_BEST_MODES));
1175 ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1176
1177 /* store the best 16x16 modes 16x16 tu */
1178 memcpy(
1179 &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0],
1180 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1181 sizeof(UWORD8) * (NUM_BEST_MODES));
1182 ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1183
1184 /* divide the 16x16 costs (pro rating) to 4 8x8 costs */
1185 /* store the same 16x16 modes as 4 8x8 child modes */
1186 {
1187 WORD32 idx_8x8;
1188 WORD32 *pi4_intra_8_cost_curr16;
1189 intra8_analyse_t *ps_intra8_analyse;
1190
1191 pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1);
1192 pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW;
1193
1194 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
1195 {
1196 pi4_intra_8_cost_curr16
1197 [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] =
1198 (child_cost[j] + 3) >> 2;
1199
1200 ps_intra8_analyse =
1201 &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
1202
1203 ps_intra8_analyse->b1_enable_nxn = 0;
1204 ps_intra8_analyse->b1_valid_cu = 1;
1205
1206 /* store the best 8x8 modes 8x8 tu */
1207 memcpy(
1208 &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
1209 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1210 sizeof(UWORD8) * (NUM_BEST_MODES + 1));
1211
1212 /* store the best 8x8 modes 4x4 tu */
1213 memcpy(
1214 &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
1215 &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
1216 sizeof(UWORD8) * (NUM_BEST_MODES + 1));
1217
1218 /* NXN modes not evaluated hence set to 0 */
1219 memset(
1220 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1221 255,
1222 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1223 }
1224 }
1225 }
1226
1227 ihevce_set_nbr_map(
1228 ps_ctxt->pu1_ctb_nbr_map,
1229 ps_ctxt->i4_nbr_map_strd,
1230 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
1231 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
1232 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
1233 0);
1234 }
1235 #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1
1236 else
1237 {
1238 for(j = 0; j < 4; j++)
1239 {
1240 WORD32 idx_8x8;
1241 intra8_analyse_t *ps_intra8_analyse;
1242 ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255;
1243 ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255;
1244
1245 ps_intra16_analyse[j].b1_valid_cu = 0;
1246
1247 for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
1248 {
1249 ps_intra8_analyse =
1250 &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
1251
1252 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
1253 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
1254
1255 ps_intra8_analyse->b1_enable_nxn = 0;
1256 ps_intra8_analyse->b1_valid_cu = 0;
1257
1258 /* NXN modes not evaluated hence set to 0 */
1259 memset(
1260 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1261 255,
1262 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1263 }
1264 }
1265
1266 child_cost_least = MAX_INTRA_COST_IPE;
1267 }
1268 #endif
1269
1270 /* Populate params for 32x32 block analysis */
1271
1272 ps_cu_node->ps_parent->u1_cu_size = 32;
1273 ps_cu_node->ps_parent->u2_x0 =
1274 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1275 ps_cu_node->ps_parent->u2_y0 =
1276 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1277
1278 /* Revaluation for 32x32 parent block at 16x16 prediction level */
1279 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1280
1281 {
1282 /* Eval for TUSize = CuSize */
1283 ihevce_mode_eval_filtering(
1284 ps_cu_node->ps_parent,
1285 ps_cu_node,
1286 ps_ctxt,
1287 ps_curr_src,
1288 26,
1289 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1290 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1291 step2_bypass,
1292 1);
1293
1294 if(i4_enable_1cu_4tu)
1295 {
1296 /* Eval for TUSize = CuSize/2 */
1297 ihevce_mode_eval_filtering(
1298 ps_cu_node->ps_parent,
1299 ps_cu_node,
1300 ps_ctxt,
1301 ps_curr_src,
1302 26,
1303 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1304 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1305 step2_bypass,
1306 0);
1307 }
1308 else
1309 {
1310 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1311 memcpy(
1312 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1313 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1314 NUM_BEST_MODES);
1315
1316 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1317 memcpy(
1318 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1319 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1320 NUM_BEST_MODES * sizeof(WORD32));
1321 }
1322 }
1323
1324 ps_ctxt->u1_disable_child_cu_decide = 0;
1325 step2_bypass = 1;
1326
1327 /* Update parent cost */
1328 parent_cost =
1329 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1330 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
1331
1332 /* Select the best mode to be populated as top and left nbr depending on the
1333 4tu and 1tu cost */
1334 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
1335 ps_cu_node->ps_parent->au4_best_cost_1tu[0])
1336 {
1337 ps_cu_node->ps_parent->best_mode =
1338 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1339 }
1340 else
1341 {
1342 ps_cu_node->ps_parent->best_mode =
1343 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
1344 }
1345
1346 /* store the 32x32 cost */
1347 *pi4_intra_32_cost = parent_cost;
1348
1349 /* set the CU valid flag */
1350 ps_intra32_analyse->b1_valid_cu = 1;
1351
1352 ps_intra32_analyse->b1_merge_flag = 1;
1353
1354 /* storing the modes to intra 32 analyse */
1355 {
1356 /* store the best 32x32 modes 16x16 tu */
1357 memcpy(
1358 &ps_intra32_analyse->au1_best_modes_16x16_tu[0],
1359 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1360 sizeof(UWORD8) * (NUM_BEST_MODES));
1361 ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1362
1363 /* store the best 32x32 modes 32x32 tu */
1364 memcpy(
1365 &ps_intra32_analyse->au1_best_modes_32x32_tu[0],
1366 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1367 sizeof(UWORD8) * (NUM_BEST_MODES));
1368 ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255;
1369 }
1370 parent_best_mode = ps_cu_node->ps_parent->best_mode;
1371 if((parent_cost <=
1372 child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
1373 LAMBDA_Q_SHIFT))) //|| identical_modes)
1374 {
1375 WORD32 i4_q_scale_q3_mod;
1376 UWORD8 u1_cu_possible_qp;
1377 WORD32 i4_act_factor;
1378
1379 /* CU size 32x32 and fill the final cu params */
1380
1381 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1382
1383 if((IHEVCE_QUALITY_P3 > i4_quality_preset))
1384 {
1385 for(i = 0; i < 4; i++)
1386 {
1387 intra8_analyse_t *ps_intra8_analyse;
1388 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
1389 for(j = 0; j < 4; j++)
1390 {
1391 /* Populate best 3 nxn modes */
1392 ps_intra8_analyse->au1_4x4_best_modes[j][0] =
1393 ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0];
1394 ps_intra8_analyse->au1_4x4_best_modes[j][1] =
1395 ps_cu_node->ps_sub_cu[i]
1396 ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode;
1397 ps_intra8_analyse->au1_4x4_best_modes[j][2] =
1398 ps_cu_node->ps_sub_cu[i]
1399 ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode;
1400 ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
1401 }
1402 }
1403 }
1404 /* store the 32x32 non split flag */
1405 ps_intra32_analyse->b1_split_flag = 0;
1406 ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0;
1407 ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0;
1408 ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0;
1409 ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0;
1410
1411 au1_best_32x32_modes[blk_cnt >> 4] =
1412 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1413
1414 au4_best_32x32_cost[blk_cnt >> 4] =
1415 ps_cu_node->ps_parent->au4_best_cost_1tu[0];
1416 /*As 32*32 has won, pick L2 8x8 qp which maps
1417 to L0 32x32 Qp*/
1418 ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
1419 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
1420 u1_cu_possible_qp = ihevce_cu_level_qp_mod(
1421 ps_ctxt->i4_qscale,
1422 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
1423 ps_ctxt->ld_curr_frame_16x16_log_avg[0],
1424 f_strength,
1425 &i4_act_factor,
1426 &i4_q_scale_q3_mod,
1427 ps_ctxt->ps_rc_quant_ctxt);
1428 /* cost accumalation of best cu size candiate */
1429 i8_frame_acc_satd_cost += parent_cost;
1430
1431 /* satd and mpm bits accumalation of best cu size candiate */
1432 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
1433
1434 /* Mode bits cost accumalation for best cu size and cu mode */
1435 i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
1436
1437 /*satd/mod_qp accumulation of best cu */
1438 i8_frame_acc_satd_by_modqp_q10 +=
1439 ((LWORD64)ps_cu_node->ps_parent->best_satd
1440 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1441 i4_q_scale_q3_mod;
1442
1443 /* Increment pointers */
1444 ps_ed_blk_l1 += 16;
1445 blk_cnt += 16;
1446 //ps_row_cu++;
1447 merge_64x64 &= 1;
1448 }
1449 else
1450 {
1451 /* store the 32x32 split flag */
1452 ps_intra32_analyse->b1_split_flag = 1;
1453
1454 /* CU size 16x16 and fill the final cu params for all 4 blocks */
1455 for(j = 0; j < 4; j++)
1456 {
1457 WORD32 i4_q_scale_q3_mod;
1458 UWORD8 u1_cu_possible_qp;
1459 WORD32 i4_act_factor;
1460
1461 /* Set CU split flag */
1462 ASSERT(blk_cnt % 4 == 0);
1463
1464 ihevce_update_cand_list(
1465 ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
1466
1467 /* store the 16x16 non split flag */
1468 ps_intra16_analyse[j].b1_split_flag = 0;
1469
1470 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1471 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
1472 /*As 16*16 has won, pick L1 8x8 qp which maps
1473 to L0 16x16 Qp*/
1474 u1_cu_possible_qp = ihevce_cu_level_qp_mod(
1475 ps_ctxt->i4_qscale,
1476 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
1477 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1478 f_strength,
1479 &i4_act_factor,
1480 &i4_q_scale_q3_mod,
1481 ps_ctxt->ps_rc_quant_ctxt);
1482
1483 /*accum satd/qp for all child block*/
1484 i8_frame_acc_satd_by_modqp_q10 +=
1485 ((LWORD64)child_satd[j]
1486 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1487 i4_q_scale_q3_mod;
1488
1489 /* Accumalate mode bits for all child blocks */
1490 i8_frame_acc_mode_bits_cost +=
1491 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
1492
1493 /* satd and mpm bits accumalation of best cu size candiate */
1494 i4_ctb_acc_satd += child_satd[j];
1495
1496 /* Increment pointers */
1497 //ps_row_cu++;
1498 ps_ed_blk_l1 += 4;
1499 blk_cnt += 4;
1500 }
1501
1502 /* cost accumalation of best cu size candiate */
1503 i8_frame_acc_satd_cost += child_cost_least;
1504
1505 /* 64x64 merge is not possible */
1506 merge_64x64 = 0;
1507 }
1508
1509 //ps_ed_blk_l2 += 4;
1510
1511 } //end of EIID's else
1512 #endif
1513 }
1514 /* If Merge success for L1 max CU size 16x16 is chosen */
1515 else if(merge_16x16_l1)
1516 {
1517 #if IP_DBG_L1_l2
1518 ps_cu_node->ps_parent->u1_cu_size = 16;
1519 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1520 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1521 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode;
1522 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1523
1524 blk_cnt += 4;
1525 ps_ed_blk_l1 += 4;
1526 ps_row_cu++;
1527 merge_64x64 = 0;
1528 #else
1529
1530 /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/
1531 /* enable this only in B pictures */
1532 if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
1533 {
1534 WORD32 i4_q_scale_q3_mod, i4_local_ctr;
1535 WORD8 i1_cu_possible_qp;
1536 WORD32 i4_act_factor;
1537 /* make cost infinity. */
1538 /* make modes invalid */
1539 /* update loop variables */
1540 /* set other output variales */
1541 /* dont set neighbour flag so that next blocks wont access this cu */
1542 /* what happens to ctb_mode_map?? */
1543
1544 ps_cu_node->ps_parent->u1_cu_size = 16;
1545 ps_cu_node->ps_parent->u2_x0 =
1546 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1547 ps_cu_node->ps_parent->u2_y0 =
1548 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1549 ps_cu_node->ps_parent->best_mode =
1550 INTRA_DC; //ps_ed_blk_l1->best_merge_mode;
1551
1552 /* fill in the first modes as invalid */
1553
1554 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
1555 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
1556 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
1557 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
1558
1559 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
1560 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
1561 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
1562
1563 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1564
1565 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
1566 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
1567
1568 ps_intra32_analyse->b1_split_flag = 1;
1569 ps_intra32_analyse->b1_merge_flag = 0;
1570
1571 ps_intra16_analyse->b1_valid_cu = 0;
1572 ps_intra16_analyse->b1_split_flag = 0;
1573 ps_intra16_analyse->b1_merge_flag = 1;
1574 //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu,
1575 // 255,
1576 // NUM_BEST_MODES);
1577 //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu,
1578 // 255,
1579 // NUM_BEST_MODES);
1580 //set only first mode since if it's 255. it wont go ahead
1581 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
1582 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
1583 *pi4_intra_16_cost = MAX_INTRA_COST_IPE;
1584
1585 /*since ME will start evaluating from bottom up, set the lower
1586 cu size data invalid */
1587 for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++)
1588 {
1589 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1590 .au1_4x4_best_modes[0][0] = 255;
1591 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1592 .au1_4x4_best_modes[1][0] = 255;
1593 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1594 .au1_4x4_best_modes[2][0] = 255;
1595 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1596 .au1_4x4_best_modes[3][0] = 255;
1597 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1598 .au1_best_modes_8x8_tu[0] = 255;
1599 ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
1600 .au1_best_modes_4x4_tu[0] = 255;
1601
1602 pi4_intra_8_cost
1603 [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] =
1604 MAX_INTRA_COST_IPE;
1605 }
1606
1607 /* set neighbours even if intra is not evaluated, since source is always available. */
1608 ihevce_set_nbr_map(
1609 ps_ctxt->pu1_ctb_nbr_map,
1610 ps_ctxt->i4_nbr_map_strd,
1611 ps_cu_node->ps_parent->u2_x0 << 1,
1612 ps_cu_node->ps_parent->u2_y0 << 1,
1613 (ps_cu_node->ps_parent->u1_cu_size >> 2),
1614 1);
1615
1616 //what happends to RC variables??
1617 /* run only constant Qp */
1618 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1619 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
1620 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1621 ps_ctxt->i4_qscale,
1622 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
1623 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1624 f_strength,
1625 &i4_act_factor,
1626 &i4_q_scale_q3_mod,
1627 ps_ctxt->ps_rc_quant_ctxt);
1628
1629 /* cost accumalation of best cu size candiate */
1630 i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation
1631
1632 /*satd/mod_qp accumulation of best cu */
1633 i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation
1634 //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod;
1635
1636 /* Accumalate mode bits for all child blocks */
1637 i8_frame_acc_mode_bits_cost +=
1638 0; //ps_cu_node->ps_parent->u2_mode_bits_cost;
1639 //incoorect accumulation
1640
1641 blk_cnt += 4;
1642 ps_ed_blk_l1 += 4;
1643 //ps_row_cu++;
1644 merge_64x64 = 0;
1645
1646 /* increment for stat purpose only. Increment is valid only on single thread */
1647 ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1;
1648 }
1649 else
1650 {
1651 /* 64x64 merge is not possible */
1652 merge_64x64 = 0;
1653
1654 /* set the 32x32 split flag to 1 */
1655 ps_intra32_analyse->b1_split_flag = 1;
1656
1657 ps_intra32_analyse->b1_merge_flag = 0;
1658
1659 ps_intra16_analyse->b1_merge_flag = 1;
1660
1661 if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
1662 (ps_ctxt->i4_slice_type == PSLICE))
1663 {
1664 ps_ctxt->u1_disable_child_cu_decide = 1;
1665 step2_bypass = 0;
1666 }
1667 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1668 /* Based on the flag, Child modes decision can be disabled*/
1669 if(0 == ps_ctxt->u1_disable_child_cu_decide)
1670 {
1671 for(j = 0; j < 4; j++)
1672 {
1673 intra8_analyse_t *ps_intra8_analyse;
1674 WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode;
1675
1676 if(best_ang_mode < 2)
1677 best_ang_mode = 26;
1678
1679 //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE;
1680 //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode;
1681
1682 ps_cu_node->ps_sub_cu[j]->u2_x0 =
1683 gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */
1684 ps_cu_node->ps_sub_cu[j]->u2_y0 =
1685 gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */
1686 ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8;
1687
1688 ihevce_mode_eval_filtering(
1689 ps_cu_node->ps_sub_cu[j],
1690 ps_cu_node,
1691 ps_ctxt,
1692 ps_curr_src,
1693 best_ang_mode,
1694 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1695 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1696 !step2_bypass,
1697 1);
1698
1699 if(i4_enable_4cu_16tu)
1700 {
1701 ihevce_mode_eval_filtering(
1702 ps_cu_node->ps_sub_cu[j],
1703 ps_cu_node,
1704 ps_ctxt,
1705 ps_curr_src,
1706 best_ang_mode,
1707 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1708 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1709 !step2_bypass,
1710 0);
1711 }
1712 else
1713 {
1714 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1715 memcpy(
1716 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1717 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1718 NUM_BEST_MODES);
1719
1720 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1721 memcpy(
1722 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1723 &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
1724 NUM_BEST_MODES * sizeof(WORD32));
1725 }
1726
1727 child_cost[j] =
1728 MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
1729 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
1730
1731 child_cost_least += child_cost[j];
1732
1733 /* Select the best mode to be populated as top and left nbr depending on the
1734 4tu and 1tu cost */
1735 if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
1736 ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
1737 {
1738 ps_cu_node->ps_sub_cu[j]->best_mode =
1739 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
1740 }
1741 else
1742 {
1743 ps_cu_node->ps_sub_cu[j]->best_mode =
1744 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
1745 }
1746 { /* Update the CTB nodes only for MAX - 1 CU nodes */
1747 WORD32 xA, yA, row, col;
1748 xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
1749 yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
1750 size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
1751 for(row = yA; row < (yA + size); row++)
1752 {
1753 for(col = xA; col < (xA + size); col++)
1754 {
1755 ps_ctxt->au1_ctb_mode_map[row][col] =
1756 ps_cu_node->ps_sub_cu[j]->best_mode;
1757 }
1758 }
1759 }
1760
1761 /*collect individual child satd for final SATD/qp accum*/
1762 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
1763
1764 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
1765
1766 /* store the child 8x8 costs */
1767 pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] =
1768 child_cost[j];
1769
1770 /* set the CU valid flag */
1771 ps_intra8_analyse->b1_valid_cu = 1;
1772 ps_intra8_analyse->b1_enable_nxn = 0;
1773
1774 /* storing the modes to intra8 analyse */
1775
1776 /* store the best 8x8 modes 8x8 tu */
1777 memcpy(
1778 &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
1779 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
1780 sizeof(UWORD8) * (NUM_BEST_MODES));
1781 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1782
1783 /* store the best 8x8 modes 4x4 tu */
1784 memcpy(
1785 &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
1786 &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
1787 sizeof(UWORD8) * (NUM_BEST_MODES));
1788 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
1789
1790 /* NXN modes not evaluated hence set to 255 */
1791 memset(
1792 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1793 255,
1794 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1795 }
1796
1797 ihevce_set_nbr_map(
1798 ps_ctxt->pu1_ctb_nbr_map,
1799 ps_ctxt->i4_nbr_map_strd,
1800 ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
1801 ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
1802 (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
1803 0);
1804 }
1805 #if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1
1806 else
1807 {
1808 for(j = 0; j < 4; j++)
1809 {
1810 intra8_analyse_t *ps_intra8_analyse;
1811 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
1812 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
1813 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
1814 /* NXN modes not evaluated hence set to 255 */
1815 memset(
1816 &ps_intra8_analyse->au1_4x4_best_modes[0][0],
1817 255,
1818 sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
1819
1820 ps_intra8_analyse->b1_valid_cu = 0;
1821 ps_intra8_analyse->b1_enable_nxn = 0;
1822 }
1823 child_cost_least = MAX_INTRA_COST_IPE;
1824 }
1825 #endif
1826 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
1827 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
1828
1829 ps_cu_node->ps_parent->u1_cu_size = 16;
1830 ps_cu_node->ps_parent->u2_x0 =
1831 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
1832 ps_cu_node->ps_parent->u2_y0 =
1833 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
1834
1835 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
1836
1837 /* Eval for TUSize = CuSize */
1838 ihevce_mode_eval_filtering(
1839 ps_cu_node->ps_parent,
1840 ps_cu_node,
1841 ps_ctxt,
1842 ps_curr_src,
1843 26,
1844 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1845 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1846 step2_bypass,
1847 1);
1848
1849 if(i4_enable_1cu_4tu)
1850 {
1851 /* Eval for TUSize = CuSize/2 */
1852 ihevce_mode_eval_filtering(
1853 ps_cu_node->ps_parent,
1854 ps_cu_node,
1855 ps_ctxt,
1856 ps_curr_src,
1857 26,
1858 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1859 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1860 step2_bypass,
1861 0);
1862 }
1863 else
1864 {
1865 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
1866 memcpy(
1867 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1868 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1869 NUM_BEST_MODES);
1870
1871 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
1872 memcpy(
1873 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1874 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
1875 NUM_BEST_MODES * sizeof(WORD32));
1876 }
1877
1878 ps_ctxt->u1_disable_child_cu_decide = 0;
1879 step2_bypass = 1;
1880
1881 /* Update parent cost */
1882 parent_cost =
1883 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
1884 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
1885
1886 /* Select the best mode to be populated as top and left nbr depending on the
1887 4tu and 1tu cost */
1888 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
1889 ps_cu_node->ps_parent->au4_best_cost_1tu[0])
1890 {
1891 ps_cu_node->ps_parent->best_mode =
1892 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
1893 }
1894 else
1895 {
1896 ps_cu_node->ps_parent->best_mode =
1897 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
1898 }
1899
1900 /* store the 16x16 cost */
1901 *pi4_intra_16_cost = parent_cost;
1902
1903 /* accumulate the 32x32 cost */
1904 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
1905 {
1906 *pi4_intra_32_cost = parent_cost;
1907 }
1908 else
1909 {
1910 *pi4_intra_32_cost += parent_cost;
1911 }
1912
1913 /* set the CU valid flag */
1914 ps_intra16_analyse->b1_valid_cu = 1;
1915
1916 /* storing the modes to intra 16 analyse */
1917 {
1918 /* store the best 16x16 modes 16x16 tu */
1919 memcpy(
1920 &ps_intra16_analyse->au1_best_modes_16x16_tu[0],
1921 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
1922 sizeof(UWORD8) * NUM_BEST_MODES);
1923 ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
1924
1925 /* store the best 16x16 modes 8x8 tu */
1926 memcpy(
1927 &ps_intra16_analyse->au1_best_modes_8x8_tu[0],
1928 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
1929 sizeof(UWORD8) * NUM_BEST_MODES);
1930 ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
1931 }
1932
1933 parent_best_mode = ps_cu_node->ps_parent->best_mode;
1934 if(parent_cost <=
1935 child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
1936 LAMBDA_Q_SHIFT)) //|| identical_modes)
1937 {
1938 WORD32 i4_q_scale_q3_mod;
1939 WORD8 i1_cu_possible_qp;
1940 WORD32 i4_act_factor;
1941 //choose parent CU
1942
1943 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
1944
1945 /* set the 16x16 non split flag */
1946 ps_intra16_analyse->b1_split_flag = 0;
1947
1948 /*As 16*16 has won, pick L1 8x8 qp which maps
1949 to L0 16x16 Qp*/
1950 ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
1951 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
1952 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1953 ps_ctxt->i4_qscale,
1954 ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
1955 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
1956 f_strength,
1957 &i4_act_factor,
1958 &i4_q_scale_q3_mod,
1959 ps_ctxt->ps_rc_quant_ctxt);
1960
1961 /* cost accumalation of best cu size candiate */
1962 i8_frame_acc_satd_cost += parent_cost;
1963
1964 /* satd and mpm bits accumalation of best cu size candiate */
1965 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
1966
1967 /*satd/mod_qp accumulation of best cu */
1968 i8_frame_acc_satd_by_modqp_q10 +=
1969 ((LWORD64)ps_cu_node->ps_parent->best_satd
1970 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
1971 i4_q_scale_q3_mod;
1972
1973 /* Accumalate mode bits for all child blocks */
1974 i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
1975
1976 blk_cnt += 4;
1977 ps_ed_blk_l1 += 4;
1978 //ps_row_cu++;
1979 }
1980 else
1981 {
1982 //choose child CU
1983 WORD8 i1_cu_possible_qp;
1984 WORD32 i4_act_factor;
1985 WORD32 i4_q_scale_q3_mod;
1986
1987 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
1988 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2);
1989 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
1990 ps_ctxt->i4_qscale,
1991 ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1],
1992 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
1993 f_strength,
1994 &i4_act_factor,
1995 &i4_q_scale_q3_mod,
1996 ps_ctxt->ps_rc_quant_ctxt);
1997
1998 /* set the 16x16 split flag */
1999 ps_intra16_analyse->b1_split_flag = 1;
2000
2001 for(j = 0; j < 4; j++)
2002 {
2003 ihevce_update_cand_list(
2004 ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
2005
2006 if((IHEVCE_QUALITY_P3 > i4_quality_preset))
2007 {
2008 WORD32 k;
2009 intra8_analyse_t *ps_intra8_analyse;
2010 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
2011
2012 for(k = 0; k < 4; k++)
2013 {
2014 /* Populate best 3 nxn modes */
2015 ps_intra8_analyse->au1_4x4_best_modes[k][0] =
2016 ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
2017 ps_intra8_analyse->au1_4x4_best_modes[k][1] =
2018 ps_cu_node->ps_sub_cu[j]
2019 ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode;
2020 ps_intra8_analyse->au1_4x4_best_modes[k][2] =
2021 ps_cu_node->ps_sub_cu[j]
2022 ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode;
2023 ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255;
2024 }
2025 }
2026 /*accum satd/qp for all child block*/
2027 i8_frame_acc_satd_by_modqp_q10 +=
2028 ((LWORD64)child_satd[j]
2029 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2030 i4_q_scale_q3_mod;
2031
2032 /* Accumalate mode bits for all child blocks */
2033 i8_frame_acc_mode_bits_cost +=
2034 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
2035
2036 /* satd and mpm bits accumalation of best cu size candiate */
2037 i4_ctb_acc_satd += child_satd[j];
2038
2039 blk_cnt += 1;
2040 ps_ed_blk_l1 += 1;
2041 //ps_row_cu++;
2042 }
2043
2044 /* cost accumalation of best cu size candiate */
2045 i8_frame_acc_satd_cost += child_cost_least;
2046 }
2047
2048 } //else of EIID
2049 #endif
2050 } // if(merge_16x16_l1)
2051 /* MAX CU SIZE 8x8 */
2052 else
2053 {
2054 #if IP_DBG_L1_l2
2055 for(i = 0; i < 4; i++)
2056 {
2057 ps_cu_node->ps_parent->u1_cu_size = 8;
2058 ps_cu_node->ps_parent->u2_x0 =
2059 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2060 ps_cu_node->ps_parent->u2_y0 =
2061 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2062 ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
2063
2064 ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2065 blk_cnt++;
2066 ps_ed_blk_l1++;
2067 ps_row_cu++;
2068 merge_64x64 = 0;
2069 }
2070 #else
2071
2072 /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */
2073 if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
2074 {
2075 WORD32 i4_q_scale_q3_mod;
2076 WORD8 i1_cu_possible_qp;
2077 WORD32 i4_act_factor;
2078
2079 merge_64x64 = 0;
2080
2081 ps_intra32_analyse->b1_merge_flag = 0;
2082
2083 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
2084 ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255;
2085 ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
2086
2087 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
2088 ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255;
2089 ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
2090 ps_intra16_analyse->b1_split_flag = 1;
2091 ps_intra16_analyse->b1_valid_cu = 0;
2092 ps_intra16_analyse->b1_merge_flag = 0;
2093
2094 for(i = 0; i < 4; i++)
2095 {
2096 intra8_analyse_t *ps_intra8_analyse;
2097 WORD32 ctr_sub_cu;
2098
2099 cu_pos_x = gau1_cu_pos_x[blk_cnt];
2100 cu_pos_y = gau1_cu_pos_y[blk_cnt];
2101
2102 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
2103 {
2104 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
2105
2106 ps_intra8_analyse->b1_valid_cu = 0;
2107 ps_intra8_analyse->b1_enable_nxn = 0;
2108 ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255;
2109 ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255;
2110 ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255;
2111 ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255;
2112 ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
2113 ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
2114
2115 ps_cu_node->ps_parent->u1_cu_size = 8;
2116 ps_cu_node->ps_parent->u2_x0 =
2117 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2118 ps_cu_node->ps_parent->u2_y0 =
2119 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2120 ps_cu_node->ps_parent->best_mode =
2121 INTRA_DC; //ps_ed_blk_l1->best_mode;
2122
2123 /* fill in the first modes as invalid */
2124
2125 ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
2126 ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
2127 INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
2128 ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
2129
2130 ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
2131 ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
2132 ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
2133
2134 ihevce_update_cand_list(
2135 ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2136
2137 //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
2138 //ps_row_cu->u1_num_intra_rdopt_cands = 0;
2139
2140 for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++)
2141 {
2142 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] =
2143 INTRA_DC;
2144 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] =
2145 INTRA_DC;
2146 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] =
2147 MAX_INTRA_COST_IPE;
2148
2149 ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] =
2150 MAX_INTRA_COST_IPE;
2151 ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost =
2152 MAX_INTRA_COST_IPE;
2153 }
2154
2155 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2156 MAX_INTRA_COST_IPE;
2157
2158 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2159 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
2160 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2161 ps_ctxt->i4_qscale,
2162 ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
2163 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2164 f_strength,
2165 &i4_act_factor,
2166 &i4_q_scale_q3_mod,
2167 ps_ctxt->ps_rc_quant_ctxt);
2168
2169 /* set neighbours even if intra is not evaluated, since source is always available. */
2170 ihevce_set_nbr_map(
2171 ps_ctxt->pu1_ctb_nbr_map,
2172 ps_ctxt->i4_nbr_map_strd,
2173 ps_cu_node->ps_parent->u2_x0 << 1,
2174 ps_cu_node->ps_parent->u2_y0 << 1,
2175 (ps_cu_node->ps_parent->u1_cu_size >> 2),
2176 1);
2177
2178 //ps_row_cu++;
2179 }
2180 blk_cnt++;
2181 ps_ed_blk_l1++;
2182 }
2183 }
2184 else
2185 {
2186 //cu_intra_cand_t *ps_cu_intra_cand;
2187 WORD8 i1_cu_possible_qp;
2188 WORD32 i4_act_factor;
2189 WORD32 i4_q_scale_q3_mod;
2190
2191 ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
2192 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
2193 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2194 ps_ctxt->i4_qscale,
2195 ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
2196 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2197 f_strength,
2198 &i4_act_factor,
2199 &i4_q_scale_q3_mod,
2200 ps_ctxt->ps_rc_quant_ctxt);
2201
2202 /* 64x64 merge is not possible */
2203 merge_64x64 = 0;
2204
2205 ps_intra32_analyse->b1_merge_flag = 0;
2206
2207 ps_intra16_analyse->b1_merge_flag = 0;
2208
2209 /* by default 16x16 modes are set to default values DC and Planar */
2210 ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0;
2211 ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1;
2212 ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
2213
2214 ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0;
2215 ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1;
2216 ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
2217 ps_intra16_analyse->b1_split_flag = 1;
2218 ps_intra16_analyse->b1_valid_cu = 1;
2219
2220 for(i = 0; i < 4; i++)
2221 {
2222 intra8_analyse_t *ps_intra8_analyse;
2223 cu_pos_x = gau1_cu_pos_x[blk_cnt];
2224 cu_pos_y = gau1_cu_pos_y[blk_cnt];
2225 if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
2226 {
2227 //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand;
2228 //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
2229
2230 //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
2231
2232 child_cost_least = 0;
2233
2234 ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
2235 ps_cu_node->ps_parent->u1_cu_size = 8;
2236 ps_cu_node->ps_parent->u2_x0 =
2237 gau1_cu_pos_x[blk_cnt]; /* Populate properly */
2238 ps_cu_node->ps_parent->u2_y0 =
2239 gau1_cu_pos_y[blk_cnt]; /* Populate properly */
2240
2241 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
2242
2243 /*EARLY DECISION 8x8 block */
2244 ihevce_pu_calc_8x8_blk(
2245 ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector);
2246 for(j = 0; j < 4; j++)
2247 {
2248 child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost;
2249 child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
2250 }
2251
2252 /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */
2253 if(0 == ps_ctxt->u1_disable_child_cu_decide)
2254 {
2255 ihevce_set_nbr_map(
2256 ps_ctxt->pu1_ctb_nbr_map,
2257 ps_ctxt->i4_nbr_map_strd,
2258 ps_cu_node->ps_parent->u2_x0 << 1,
2259 ps_cu_node->ps_parent->u2_y0 << 1,
2260 (ps_cu_node->ps_parent->u1_cu_size >> 2),
2261 0);
2262
2263 //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
2264
2265 /* Eval for TUSize = CuSize */
2266 ihevce_mode_eval_filtering(
2267 ps_cu_node->ps_parent,
2268 ps_cu_node,
2269 ps_ctxt,
2270 ps_curr_src,
2271 26,
2272 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2273 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2274 step2_bypass,
2275 1);
2276
2277 if(i4_enable_1cu_4tu)
2278 {
2279 /* Eval for TUSize = CuSize/2 */
2280 ihevce_mode_eval_filtering(
2281 ps_cu_node->ps_parent,
2282 ps_cu_node,
2283 ps_ctxt,
2284 ps_curr_src,
2285 26,
2286 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2287 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2288 step2_bypass,
2289 0);
2290 }
2291 else
2292 {
2293 /* 4TU not evaluated : 4tu modes set same as 1tu modes */
2294 memcpy(
2295 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2296 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2297 NUM_BEST_MODES);
2298
2299 /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
2300 memcpy(
2301 &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2302 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2303 NUM_BEST_MODES * sizeof(WORD32));
2304 }
2305
2306 /* Update parent cost */
2307 parent_cost =
2308 MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
2309 ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
2310
2311 /* Select the best mode to be populated as top and left nbr depending on the
2312 4tu and 1tu cost */
2313 if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
2314 ps_cu_node->ps_parent->au4_best_cost_1tu[0])
2315 {
2316 ps_cu_node->ps_parent->best_mode =
2317 ps_cu_node->ps_parent->au1_best_mode_1tu[0];
2318 }
2319 else
2320 {
2321 ps_cu_node->ps_parent->best_mode =
2322 ps_cu_node->ps_parent->au1_best_mode_4tu[0];
2323 }
2324 }
2325
2326 /* set the CU valid flag */
2327 ps_intra8_analyse->b1_valid_cu = 1;
2328 ps_intra8_analyse->b1_enable_nxn = 0;
2329
2330 /* storing the modes to intra 8 analyse */
2331
2332 /* store the best 8x8 modes 8x8 tu */
2333 memcpy(
2334 &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
2335 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2336 sizeof(UWORD8) * (NUM_BEST_MODES));
2337 ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
2338
2339 /* store the best 8x8 modes 4x4 tu */
2340 memcpy(
2341 &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
2342 &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
2343 sizeof(UWORD8) * (NUM_BEST_MODES));
2344 ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
2345
2346 /*As 8*8 has won, pick L1 4x4 qp which is equal to
2347 L1 8x8 Qp*/
2348 //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp;
2349 //ps_row_cu->i4_act_factor[0][1] = i4_act_factor;
2350
2351 parent_best_mode = ps_cu_node->ps_parent->best_mode;
2352 if(parent_cost <=
2353 child_cost_least +
2354 (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT))
2355 {
2356 /*CU = 4TU */
2357 ihevce_update_cand_list(
2358 ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
2359
2360 /* store the child 8x8 costs */
2361 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2362 parent_cost;
2363
2364 /* cost accumalation of best cu size candiate */
2365 i8_frame_acc_satd_cost += parent_cost;
2366
2367 /*satd/mod_qp accumulation of best cu */
2368 i8_frame_acc_satd_by_modqp_q10 +=
2369 ((LWORD64)ps_cu_node->ps_parent->best_satd
2370 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2371 i4_q_scale_q3_mod;
2372
2373 /* Accumalate mode bits for all child blocks */
2374 i8_frame_acc_mode_bits_cost +=
2375 ps_cu_node->ps_parent->u2_mode_bits_cost;
2376
2377 /* satd and mpm bits accumalation of best cu size candiate */
2378 i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
2379
2380 /* accumulate the 16x16 cost*/
2381 if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
2382 {
2383 *pi4_intra_16_cost = parent_cost;
2384 }
2385 else
2386 {
2387 *pi4_intra_16_cost += parent_cost;
2388 }
2389
2390 /* accumulate the 32x32 cost*/
2391 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
2392 {
2393 *pi4_intra_32_cost = parent_cost;
2394 }
2395 else
2396 {
2397 *pi4_intra_32_cost += parent_cost;
2398 }
2399 }
2400 else
2401 {
2402 /*CU = 4PU*/
2403 //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0;
2404 //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0;
2405 //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size;
2406
2407 /* store the child 8x8 costs woth 4x4 pu summed cost */
2408 pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
2409 (child_cost_least);
2410
2411 /* accumulate the 16x16 cost*/
2412 if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
2413 {
2414 *pi4_intra_16_cost = child_cost_least;
2415 }
2416 else
2417 {
2418 *pi4_intra_16_cost += child_cost_least;
2419 }
2420
2421 /* cost accumalation of best cu size candiate */
2422 i8_frame_acc_satd_cost += child_cost_least;
2423
2424 for(j = 0; j < 4; j++)
2425 {
2426 /*satd/qp accumualtion*/
2427 i8_frame_acc_satd_by_modqp_q10 +=
2428 ((LWORD64)child_satd[j]
2429 << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2430 i4_q_scale_q3_mod;
2431
2432 /* Accumalate mode bits for all child blocks */
2433 i8_frame_acc_mode_bits_cost +=
2434 ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
2435
2436 /* satd and mpm bits accumalation of best cu size candiate */
2437 i4_ctb_acc_satd += child_satd[j];
2438 }
2439
2440 /* accumulate the 32x32 cost*/
2441 if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
2442 {
2443 *pi4_intra_32_cost = child_cost_least;
2444 }
2445 else
2446 {
2447 *pi4_intra_32_cost += child_cost_least;
2448 }
2449
2450 ps_intra8_analyse->b1_enable_nxn = 1;
2451
2452 /* Insert the best 8x8 modes unconditionally */
2453
2454 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
2455 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
2456 size = ps_cu_node->u1_cu_size >> 2;
2457
2458 ps_ctxt->au1_ctb_mode_map[y][x] =
2459 ps_cu_node->ps_sub_cu[0]->best_mode;
2460 ps_ctxt->au1_ctb_mode_map[y][x + 1] =
2461 ps_cu_node->ps_sub_cu[1]->best_mode;
2462 ps_ctxt->au1_ctb_mode_map[y + 1][x] =
2463 ps_cu_node->ps_sub_cu[2]->best_mode;
2464 ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] =
2465 ps_cu_node->ps_sub_cu[3]->best_mode;
2466 }
2467 /* NXN mode population */
2468 for(j = 0; j < 4; j++)
2469 {
2470 cand_mode_list[0] =
2471 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
2472 cand_mode_list[1] =
2473 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1];
2474 cand_mode_list[2] =
2475 ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2];
2476
2477 if(1)
2478 {
2479 /* Populate best 3 nxn modes */
2480 ps_intra8_analyse->au1_4x4_best_modes[j][0] =
2481 cand_mode_list[0];
2482 ps_intra8_analyse->au1_4x4_best_modes[j][1] =
2483 cand_mode_list[1]; //(ps_ed + 1)->best_mode;
2484 ps_intra8_analyse->au1_4x4_best_modes[j][2] =
2485 cand_mode_list[2]; //(ps_ed + 2)->best_mode;
2486 ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
2487
2488 //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4);
2489 }
2490 /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */
2491
2492 else /* IHEVCE_QUALITY_P0 == i4_quality_preset */
2493 {
2494 /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/
2495 ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0;
2496 }
2497
2498 ps_intra8_analyse
2499 ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255;
2500 }
2501
2502 //ps_row_cu++;
2503 }
2504 else
2505 {
2506 /* For Incomplete CTB, 16x16 is not valid */
2507 ps_intra16_analyse->b1_valid_cu = 0;
2508 }
2509 blk_cnt++;
2510 ps_ed_blk_l1++;
2511 }
2512 //ps_ed_blk_l2 ++;
2513 } //else of EIID
2514 #endif
2515 }
2516 }
2517 else
2518 {
2519 /* For incomplete CTB, init valid CU to 0 */
2520 ps_ed_blk_l1++;
2521 ps_intra32_analyse->b1_valid_cu = 0;
2522 ps_intra16_analyse[0].b1_valid_cu = 0;
2523 blk_cnt++;
2524 merge_64x64 = 0;
2525 }
2526 } while(blk_cnt != MAX_CTB_SIZE);
2527 /* if 64x64 merge is possible then check for 32x32 having same best modes */
2528 if(1 == merge_64x64)
2529 {
2530 WORD32 act_mode = au1_best_32x32_modes[0];
2531
2532 ps_ed_blk_l2 = ps_ed_l2_ctb;
2533 best_mode = ps_ed_blk_l2->best_mode;
2534 merge_64x64 =
2535 ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) +
2536 (act_mode == au1_best_32x32_modes[2]) +
2537 (act_mode == au1_best_32x32_modes[3]) ==
2538 4);
2539 if(merge_64x64 == 1)
2540 best_mode = au1_best_32x32_modes[0];
2541 else
2542 best_mode = ps_ed_blk_l2->best_mode;
2543 /* All 32x32 costs are accumalated to 64x64 cost */
2544 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
2545 for(i = 0; i < 4; i++)
2546 {
2547 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
2548 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
2549 }
2550
2551 /* If all modes of 32x32 block is not same */
2552 if(0 == merge_64x64)
2553 {
2554 /*Compute CHILD cost for 32x32 */
2555 WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] +
2556 au4_best_32x32_cost[2] + au4_best_32x32_cost[3];
2557 WORD32 cost = MAX_INTRA_COST_IPE;
2558
2559 WORD32 best_mode_temp = 0;
2560 /*Compute 64x64 cost for each mode of 32x32*/
2561 for(i = 0; i < 4; i++)
2562 {
2563 WORD32 mode = au1_best_32x32_modes[i];
2564 if(mode < 2)
2565 mode = 26;
2566 ps_cu_node->ps_parent->u1_cu_size = 64;
2567 ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */
2568 ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */
2569
2570 ihevce_set_nbr_map(
2571 ps_ctxt->pu1_ctb_nbr_map,
2572 ps_ctxt->i4_nbr_map_strd,
2573 (ps_cu_node->ps_parent->u2_x0 << 1),
2574 (ps_cu_node->ps_parent->u2_y0 << 1),
2575 (ps_cu_node->ps_parent->u1_cu_size >> 2),
2576 0);
2577
2578 ihevce_mode_eval_filtering(
2579 ps_cu_node->ps_parent,
2580 ps_cu_node,
2581 ps_ctxt,
2582 ps_curr_src,
2583 mode,
2584 &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
2585 &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
2586 !step2_bypass,
2587 0);
2588
2589 parent_cost = ps_cu_node->ps_parent->best_cost;
2590 if(cost > parent_cost)
2591 {
2592 cost = parent_cost;
2593 best_mode_temp = ps_cu_node->ps_parent->best_mode;
2594 }
2595 }
2596 if(cost < child_cost_64x64)
2597 {
2598 merge_64x64 = 1;
2599 best_mode = best_mode_temp;
2600
2601 /* Update 64x64 cost if CU 64x64 is chosen */
2602 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost;
2603
2604 /* Accumalate the least cost for CU 64x64 */
2605 i8_frame_acc_satd_cost = cost;
2606 i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost;
2607
2608 /* satd and mpm bits accumalation of best cu size candiate */
2609 i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd;
2610 }
2611 }
2612 }
2613
2614 if(merge_64x64)
2615 {
2616 WORD32 i, j;
2617 intra32_analyse_t *ps_intra32_analyse;
2618 intra16_analyse_t *ps_intra16_analyse;
2619 WORD32 row, col;
2620 WORD32 i4_q_scale_q3_mod;
2621 WORD8 i1_cu_possible_qp;
2622 WORD32 i4_act_factor;
2623 //ps_row_cu = ps_curr_cu;
2624 ps_ctb_out->u4_cu_split_flags = 0x0;
2625 ps_ed_blk_l1 = ps_ed_l1_ctb;
2626 ps_ed_blk_l2 = ps_ed_l2_ctb;
2627
2628 ps_l0_ipe_out_ctb->u1_split_flag = 0;
2629
2630 /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/
2631 for(i = 0; i < 4; i++)
2632 {
2633 /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */
2634 /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
2635 ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i];
2636
2637 for(j = 0; j < 4; j++)
2638 {
2639 /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
2640 /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
2641 ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j];
2642 ps_intra16_analyse->b1_merge_flag = 0;
2643 }
2644 }
2645
2646 /* CU size 64x64 and fill the final cu params */
2647 //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0];
2648 //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0];
2649 //ps_row_cu->u1_cu_size = 64;
2650
2651 /* Candidate mode Update */
2652 cand_mode_list[0] = best_mode;
2653 if(cand_mode_list[0] > 1)
2654 {
2655 if(cand_mode_list[0] == 2)
2656 {
2657 cand_mode_list[1] = 34;
2658 cand_mode_list[2] = 3;
2659 }
2660 else if(cand_mode_list[0] == 34)
2661 {
2662 cand_mode_list[1] = 2;
2663 cand_mode_list[2] = 33;
2664 }
2665 else
2666 {
2667 cand_mode_list[1] = cand_mode_list[0] - 1;
2668 cand_mode_list[2] = cand_mode_list[0] + 1;
2669 }
2670 //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode;
2671 //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode;
2672 }
2673 else
2674 {
2675 cand_mode_list[0] = 0;
2676 cand_mode_list[1] = 1;
2677 cand_mode_list[2] = 26;
2678 //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode;
2679 }
2680
2681 /* All 32x32 costs are accumalated to 64x64 cost */
2682 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
2683 for(i = 0; i < 4; i++)
2684 {
2685 ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
2686 ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
2687 }
2688 /* by default 64x64 modes are set to default values DC and Planar */
2689 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0];
2690 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1];
2691 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2];
2692 ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255;
2693
2694 /* Update CTB mode map for the finalised CU */
2695 x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
2696 y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
2697 size = ps_cu_node->u1_cu_size >> 2;
2698
2699 for(row = y; row < (y + size); row++)
2700 {
2701 for(col = x; col < (x + size); col++)
2702 {
2703 ps_ctxt->au1_ctb_mode_map[row][col] = best_mode;
2704 }
2705 }
2706
2707 ihevce_set_nbr_map(
2708 ps_ctxt->pu1_ctb_nbr_map,
2709 ps_ctxt->i4_nbr_map_strd,
2710 (ps_cu_node->u2_x0 << 1),
2711 (ps_cu_node->u2_y0 << 1),
2712 (ps_cu_node->u1_cu_size >> 2),
2713 1);
2714
2715 /*As 64*64 has won, pick L1 32x32 qp*/
2716 //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6));
2717 //ASSERT((blk_cnt>>6) == 0);
2718 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
2719 i1_cu_possible_qp = ihevce_cu_level_qp_mod(
2720 ps_ctxt->i4_qscale,
2721 ps_ed_ctb_l1->i4_32x32_satd[0][0],
2722 ps_ctxt->ld_curr_frame_32x32_log_avg[0],
2723 f_strength,
2724 &i4_act_factor,
2725 &i4_q_scale_q3_mod,
2726 ps_ctxt->ps_rc_quant_ctxt);
2727
2728 i8_frame_acc_satd_by_modqp_q10 =
2729 (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
2730 i4_q_scale_q3_mod;
2731 /* Increment pointers */
2732 ps_ed_blk_l1 += 64;
2733 ps_ed_blk_l2 += 16;
2734 //ps_row_cu++;
2735 }
2736 }
2737
2738 //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu);
2739
2740 {
2741 WORD32 i4_i, i4_j;
2742 WORD32 dummy;
2743 WORD8 i1_cu_qp;
2744 (void)i1_cu_qp;
2745 /*MAM_VAR_L1*/
2746 for(i4_j = 0; i4_j < 2; i4_j++)
2747 {
2748 i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j];
2749 f_strength = ps_ctxt->f_strength;
2750
2751 //i4_mod_factor_num = 4;
2752
2753 ps_ed_blk_l1 = ps_ed_l1_ctb;
2754 ps_ed_blk_l2 = ps_ed_l2_ctb;
2755 //ps_row_cu = ps_curr_cu;
2756
2757 /*Valid only for complete CTB */
2758 if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt))
2759 {
2760 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
2761 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2);
2762 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2);
2763 ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2);
2764
2765 i1_cu_qp = ihevce_cu_level_qp_mod(
2766 ps_ctxt->i4_qscale,
2767 ps_ed_ctb_l1->i4_32x32_satd[0][0],
2768 ps_ctxt->ld_curr_frame_32x32_log_avg[0],
2769 f_strength,
2770 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j],
2771 &dummy,
2772 ps_ctxt->ps_rc_quant_ctxt);
2773
2774 i1_cu_qp = ihevce_cu_level_qp_mod(
2775 ps_ctxt->i4_qscale,
2776 ps_ed_ctb_l1->i4_32x32_satd[0][1],
2777 ps_ctxt->ld_curr_frame_32x32_log_avg[1],
2778 f_strength,
2779 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j],
2780 &dummy,
2781 ps_ctxt->ps_rc_quant_ctxt);
2782 i1_cu_qp = ihevce_cu_level_qp_mod(
2783 ps_ctxt->i4_qscale,
2784 ps_ed_ctb_l1->i4_32x32_satd[0][2],
2785 ps_ctxt->ld_curr_frame_32x32_log_avg[2],
2786 f_strength,
2787 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j],
2788 &dummy,
2789 ps_ctxt->ps_rc_quant_ctxt);
2790
2791 i1_cu_qp = ihevce_cu_level_qp_mod(
2792 ps_ctxt->i4_qscale,
2793 ps_ed_ctb_l1->i4_32x32_satd[0][3],
2794 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0],
2795 f_strength,
2796 &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j],
2797 &dummy,
2798 ps_ctxt->ps_rc_quant_ctxt);
2799
2800 ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0);
2801 }
2802 else
2803 {
2804 ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024;
2805 ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024;
2806 ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024;
2807 ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024;
2808 }
2809
2810 /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction
2811 for the usage by ME*/
2812
2813 {
2814 WORD32 pos_x_32, pos_y_32, pos;
2815 //WORD32 i4_incomplete_ctb_val_8;
2816 pos_x_32 = u1_curr_ctb_wdt / 16;
2817 pos_y_32 = u1_curr_ctb_hgt / 16;
2818
2819 pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32;
2820
2821 for(i4_i = 0; i4_i < 4; i4_i++)
2822 {
2823 if(i4_i < pos)
2824 {
2825 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2);
2826 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2);
2827 ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2);
2828 i1_cu_qp = ihevce_cu_level_qp_mod(
2829 ps_ctxt->i4_qscale,
2830 ps_ed_ctb_l1->i4_16x16_satd[i4_i][0],
2831 ps_ctxt->ld_curr_frame_16x16_log_avg[0],
2832 f_strength,
2833 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j],
2834 &dummy,
2835 ps_ctxt->ps_rc_quant_ctxt);
2836 i1_cu_qp = ihevce_cu_level_qp_mod(
2837 ps_ctxt->i4_qscale,
2838 ps_ed_ctb_l1->i4_16x16_satd[i4_i][1],
2839 ps_ctxt->ld_curr_frame_16x16_log_avg[1],
2840 f_strength,
2841 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j],
2842 &dummy,
2843 ps_ctxt->ps_rc_quant_ctxt);
2844 i1_cu_qp = ihevce_cu_level_qp_mod(
2845 ps_ctxt->i4_qscale,
2846 ps_ed_ctb_l1->i4_16x16_satd[i4_i][2],
2847 ps_ctxt->ld_curr_frame_16x16_log_avg[2],
2848 f_strength,
2849 &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j],
2850 &dummy,
2851 ps_ctxt->ps_rc_quant_ctxt);
2852 }
2853 else
2854 {
2855 /*For incomplete CTB */
2856 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024;
2857 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024;
2858 ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024;
2859 }
2860 }
2861 }
2862
2863 /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction
2864 for the usage by ME*/
2865 {
2866 WORD32 pos_x_16, pos_y_16, pos;
2867 //WORD32 i4_incomplete_ctb_val_8;
2868 pos_x_16 = u1_curr_ctb_wdt / 4;
2869 pos_y_16 = u1_curr_ctb_hgt / 4;
2870
2871 pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16;
2872 for(i4_i = 0; i4_i < 16; i4_i++)
2873 {
2874 if(i4_i < pos)
2875 {
2876 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2);
2877 ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2);
2878 i1_cu_qp = ihevce_cu_level_qp_mod(
2879 ps_ctxt->i4_qscale,
2880 ps_ed_ctb_l1->i4_8x8_satd[i4_i][0],
2881 ps_ctxt->ld_curr_frame_8x8_log_avg[0],
2882 f_strength,
2883 &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j],
2884 &dummy,
2885 ps_ctxt->ps_rc_quant_ctxt);
2886 i1_cu_qp = ihevce_cu_level_qp_mod(
2887 ps_ctxt->i4_qscale,
2888 ps_ed_ctb_l1->i4_8x8_satd[i4_i][1],
2889 ps_ctxt->ld_curr_frame_8x8_log_avg[1],
2890 f_strength,
2891 &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j],
2892 &dummy,
2893 ps_ctxt->ps_rc_quant_ctxt);
2894 }
2895 else
2896 {
2897 /*For incomplete CTB */
2898 ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024;
2899 ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024;
2900 }
2901 }
2902 }
2903 } //for loop
2904
2905 /* Accumalate the cost of ctb to the total cost */
2906 ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost;
2907 ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10;
2908
2909 ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost;
2910
2911 /* satd and mpm bits accumalation of best cu size candiate for the ctb */
2912 ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd;
2913 ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost;
2914
2915 ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd;
2916 }
2917
2918 {
2919 WORD32 ctr_8x8;
2920 for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++)
2921 {
2922 /*Accumalate activity factor for Intra and Inter*/
2923 if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] <
2924 ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8])
2925 {
2926 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
2927 ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
2928 }
2929 else
2930 {
2931 ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
2932 ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
2933 }
2934
2935 /*Accumalate activity factor at frame level*/
2936 ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8];
2937 }
2938 }
2939 return;
2940 }
2941
ihevce_nxn_sad_computer(UWORD8 * pu1_inp,WORD32 i4_inp_stride,UWORD8 * pu1_ref,WORD32 i4_ref_stride,WORD32 trans_size)2942 WORD32 ihevce_nxn_sad_computer(
2943 UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size)
2944 {
2945 WORD32 wd, ht, i, j;
2946 WORD32 sad = 0;
2947
2948 wd = trans_size;
2949 ht = trans_size;
2950
2951 for(i = 0; i < ht; i++)
2952 {
2953 for(j = 0; j < wd; j++)
2954 {
2955 sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j])));
2956 }
2957 pu1_inp += i4_inp_stride;
2958 pu1_ref += i4_ref_stride;
2959 }
2960
2961 return sad;
2962 }
2963
2964 /*!
2965 ******************************************************************************
2966 * \if Function name : ihevce_mode_eval_filtering \endif
2967 *
2968 * \brief
2969 * Evaluates best 3 modes for the given CU size with probable modes from,
2970 * early decision structure, mpm candidates and dc, planar mode
2971 *
2972 * \param[in] ps_cu_node : pointer to MAX cu node info buffer
2973 * \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer
2974 * \param[in] ps_ctxt : pointer to IPE context struct
2975 * \param[in] ps_curr_src : pointer to src pixels struct
2976 * \param[in] best_amode : best angular mode from l1 layer or
2977 from (MAX - 1) CU mode
2978 * \param[in] best_costs_4x4 : pointer to 3 best cost buffer
2979 * \param[in] best_modes_4x4 : pointer to 3 best mode buffer
2980 * \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated
2981 * if 1, (MAX CU) sugested is evaluated
2982 * \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2
2983 *
2984 * \return
2985 * None
2986 *
2987 * \author
2988 * Ittiam
2989 *
2990 *****************************************************************************
2991 */
ihevce_mode_eval_filtering(ihevce_ipe_cu_tree_t * ps_cu_node,ihevce_ipe_cu_tree_t * ps_child_cu_node,ihevce_ipe_ctxt_t * ps_ctxt,iv_enc_yuv_buf_t * ps_curr_src,WORD32 best_amode,WORD32 * best_costs_4x4,UWORD8 * best_modes_4x4,WORD32 step2_bypass,WORD32 tu_eq_cu)2992 void ihevce_mode_eval_filtering(
2993 ihevce_ipe_cu_tree_t *ps_cu_node,
2994 ihevce_ipe_cu_tree_t *ps_child_cu_node,
2995 ihevce_ipe_ctxt_t *ps_ctxt,
2996 iv_enc_yuv_buf_t *ps_curr_src,
2997 WORD32 best_amode,
2998 WORD32 *best_costs_4x4,
2999 UWORD8 *best_modes_4x4,
3000 WORD32 step2_bypass,
3001 WORD32 tu_eq_cu)
3002 {
3003 UWORD8 *pu1_origin, *pu1_orig;
3004 WORD32 src_strd = ps_curr_src->i4_y_strd;
3005 WORD32 nbr_flags;
3006 nbr_avail_flags_t s_nbr;
3007 WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1;
3008 WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2;
3009 WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2;
3010 UWORD8 mode;
3011
3012 WORD32 cost_ang_mode = MAX_INTRA_COST_IPE;
3013 WORD32 filter_flag;
3014 WORD32 cost_amode_step2[7] = { 0 };
3015 /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */
3016 WORD32 sad = 0;
3017 WORD32 cu_pos_x, cu_pos_y;
3018 WORD32 temp;
3019 WORD32 i = 0, j, k, i_end, z;
3020 //WORD32 row, col, size;
3021 UWORD8 *pu1_ref;
3022 WORD32 xA, yA, xB, yB;
3023 WORD32 top_intra_mode;
3024 WORD32 left_intra_mode;
3025 UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3026 UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3027
3028 UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
3029 WORD32 count;
3030
3031 pf_ipe_res_trans_had apf_resd_trns_had[4];
3032
3033 WORD32 cand_mode_satd_list[3];
3034 ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
3035
3036 ihevc_intra_pred_luma_ref_substitution_fptr =
3037 ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
3038
3039 apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit;
3040 apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit;
3041 apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit;
3042 apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit;
3043
3044 /* initialize modes_to_eval as zero */
3045 memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES);
3046
3047 /* Compute the Parent Cost */
3048
3049 /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */
3050 pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) +
3051 (ps_cu_node->u2_x0 << 3);
3052
3053 /* Get position of CU within CTB at 4x4 granularity */
3054 cu_pos_x = ps_cu_node->u2_x0 << 1;
3055 cu_pos_y = ps_cu_node->u2_y0 << 1;
3056
3057 /* get the neighbour availability flags */
3058 ihevce_get_only_nbr_flag(
3059 &s_nbr,
3060 ps_ctxt->pu1_ctb_nbr_map,
3061 ps_ctxt->i4_nbr_map_strd,
3062 cu_pos_x,
3063 cu_pos_y,
3064 trans_size >> 2,
3065 trans_size >> 2);
3066
3067 /* Traverse for all 4 child blocks in the parent block */
3068 xA = (ps_cu_node->u2_x0 << 3) >> 2;
3069 yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
3070 xB = xA + 1;
3071 yB = yA - 1;
3072 left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
3073 top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
3074 /* call the function which populates sad cost for all the modes */
3075
3076 ihevce_intra_populate_mode_bits_cost_bracketing(
3077 top_intra_mode,
3078 left_intra_mode,
3079 s_nbr.u1_top_avail,
3080 s_nbr.u1_left_avail,
3081 ps_cu_node->u2_y0,
3082 &ps_ctxt->au2_mode_bits_satd_cost[0],
3083 &ps_ctxt->au2_mode_bits_satd[0],
3084 ps_ctxt->i4_ol_satd_lambda,
3085 cand_mode_satd_list);
3086
3087 for(k = 0; k < num_tu_in_y; k++)
3088 {
3089 for(j = 0; j < num_tu_in_x; j++)
3090 {
3091 /* get the neighbour availability flags */
3092 nbr_flags = ihevce_get_nbr_intra(
3093 &s_nbr,
3094 ps_ctxt->pu1_ctb_nbr_map,
3095 ps_ctxt->i4_nbr_map_strd,
3096 cu_pos_x + ((j) * (trans_size >> 2)),
3097 cu_pos_y + ((k) * (trans_size >> 2)),
3098 trans_size >> 2);
3099
3100 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3101
3102 /* Create reference samples array */
3103 ihevc_intra_pred_luma_ref_substitution_fptr(
3104 pu1_origin - src_strd - 1,
3105 pu1_origin - src_strd,
3106 pu1_origin - 1,
3107 src_strd,
3108 trans_size,
3109 nbr_flags,
3110 pu1_ref_orig,
3111 0);
3112
3113 /* Perform reference samples filtering */
3114 ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt);
3115
3116 ihevce_set_nbr_map(
3117 ps_ctxt->pu1_ctb_nbr_map,
3118 ps_ctxt->i4_nbr_map_strd,
3119 cu_pos_x + ((j) * (trans_size >> 2)),
3120 cu_pos_y + ((k) * (trans_size >> 2)),
3121 (trans_size >> 2),
3122 1);
3123
3124 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3125 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3126 }
3127 }
3128
3129 /* Revaluation for angular mode */
3130 //if(ps_ed_blk->ang_attr.mode_present == 1)
3131 //if(((best_amode & 0x1) != 1))
3132
3133 {
3134 WORD32 u1_trans_idx = trans_size >> 3;
3135 if(trans_size == 32)
3136 u1_trans_idx = 3;
3137 //best_amode = ps_ed_blk->ang_attr.best_mode;
3138
3139 i = 0;
3140 if(!step2_bypass)
3141 {
3142 /* Around best level 4 angular mode, search for best level 2 mode */
3143 ASSERT((best_amode >= 2) && (best_amode <= 34));
3144
3145 if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
3146 {
3147 if(best_amode >= 4)
3148 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2;
3149 }
3150
3151 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode;
3152
3153 if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
3154 {
3155 if(best_amode <= 32)
3156 ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2;
3157 }
3158 }
3159 else
3160 {
3161 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode;
3162 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode;
3163 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode;
3164 ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode;
3165 }
3166
3167 /* Add the left and top MPM modes for computation*/
3168
3169 ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0];
3170 ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1];
3171
3172 i_end = i;
3173 count = 0;
3174
3175 /*Remove duplicate modes from modes_to_eval_temp[] */
3176 for(j = 0; j < i_end; j++)
3177 {
3178 for(k = 0; k < count; k++)
3179 {
3180 if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k])
3181 break;
3182 }
3183 if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1))
3184 {
3185 ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j];
3186 count++;
3187 }
3188 }
3189 i_end = count;
3190 if(count == 0)
3191 {
3192 ps_ctxt->au1_modes_to_eval[0] = 26;
3193 i_end = 1;
3194 }
3195
3196 for(i = 0; i < i_end; i++)
3197 {
3198 pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3199 pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3200
3201 mode = ps_ctxt->au1_modes_to_eval[i];
3202 ASSERT((mode >= 2) && (mode <= 34));
3203 cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
3204 filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
3205
3206 for(k = 0; k < num_tu_in_y; k++)
3207 {
3208 for(j = 0; j < num_tu_in_x; j++)
3209 {
3210 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3211
3212 if(0 == filter_flag)
3213 pu1_ref = pu1_ref_orig;
3214 else
3215 pu1_ref = pu1_ref_filt;
3216
3217 g_apf_lum_ip[g_i4_ip_funcs[mode]](
3218 pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
3219
3220 if(ps_ctxt->u1_use_satd)
3221 {
3222 sad = apf_resd_trns_had[u1_trans_idx](
3223 pu1_origin,
3224 ps_curr_src->i4_y_strd,
3225 &ps_ctxt->au1_pred_samples[0],
3226 trans_size,
3227 NULL,
3228 0
3229
3230 );
3231 }
3232 else
3233 {
3234 sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer(
3235 pu1_origin,
3236 ps_curr_src->i4_y_strd,
3237 &ps_ctxt->au1_pred_samples[0],
3238 trans_size,
3239 trans_size);
3240 }
3241
3242 cost_amode_step2[i] += sad;
3243
3244 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3245 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3246 }
3247 }
3248 }
3249 best_amode = ps_ctxt->au1_modes_to_eval[0];
3250 /*Init cost indx */
3251 cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0];
3252 for(z = 0; z < i_end; z++)
3253 {
3254 /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
3255 if(cost_ang_mode >= cost_amode_step2[z])
3256 {
3257 if(cost_ang_mode == cost_amode_step2[z])
3258 {
3259 if(best_amode > ps_ctxt->au1_modes_to_eval[z])
3260 best_amode = ps_ctxt->au1_modes_to_eval[z];
3261 }
3262 else
3263 {
3264 best_amode = ps_ctxt->au1_modes_to_eval[z];
3265 }
3266 cost_ang_mode = cost_amode_step2[z];
3267 }
3268 }
3269
3270 /*Modify mode bits for the angular modes */
3271 }
3272
3273 {
3274 /* Step - I modification */
3275 ASSERT((best_amode >= 2) && (best_amode <= 34));
3276 i_end = 0;
3277 z = 0;
3278
3279 /* Around best level 3 angular mode, search for best level 1 mode */
3280 ps_ctxt->au1_modes_to_eval[i_end++] = 0;
3281 ps_ctxt->au1_modes_to_eval[i_end++] = 1;
3282
3283 if(best_amode != 2)
3284 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1;
3285
3286 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode;
3287
3288 if(best_amode != 34)
3289 ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1;
3290
3291 /* Inserting step_2's best mode at last to avoid
3292 recalculation of it's SATD cost */
3293
3294 //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD
3295 //cost_amode_step2[i_end] = cost_ang_mode;
3296
3297 /*best_sad[i_end] = cost_ang_mode
3298 - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */
3299
3300 cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */
3301
3302 for(i = 0; i < i_end; i++)
3303 {
3304 WORD32 u1_trans_idx = trans_size >> 3;
3305 if(trans_size == 32)
3306 u1_trans_idx = 3;
3307 pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
3308 pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
3309
3310 /*best_sad[i] = 0; //See NOTE_A01 above */
3311 mode = ps_ctxt->au1_modes_to_eval[i];
3312 cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
3313 filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
3314
3315 for(k = 0; k < num_tu_in_y; k++)
3316 {
3317 for(j = 0; j < num_tu_in_x; j++)
3318 {
3319 pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
3320
3321 if(0 == filter_flag)
3322 pu1_ref = pu1_ref_orig;
3323 else
3324 pu1_ref = pu1_ref_filt;
3325
3326 g_apf_lum_ip[g_i4_ip_funcs[mode]](
3327 pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
3328
3329 //if(trans_size != 4)
3330 {
3331 sad = apf_resd_trns_had[u1_trans_idx](
3332 pu1_origin,
3333 ps_curr_src->i4_y_strd,
3334 &ps_ctxt->au1_pred_samples[0],
3335 trans_size,
3336 NULL,
3337 0);
3338 }
3339
3340 /*accumualting SATD though name says it is sad*/
3341 cost_amode_step2[i] += sad;
3342 /*best_sad[i] +=sad; //See NOTE_A01 above */
3343 pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
3344 pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
3345 }
3346 }
3347 }
3348 /* Updating i_end for the step_2's inserted mode*/
3349 // i_end++;
3350
3351 /* Arrange the reference array in ascending order */
3352
3353 for(i = 0; i < (i_end - 1); i++)
3354 {
3355 for(j = i + 1; j < i_end; j++)
3356 {
3357 if(cost_amode_step2[i] > cost_amode_step2[j])
3358 {
3359 temp = cost_amode_step2[i];
3360 cost_amode_step2[i] = cost_amode_step2[j];
3361 cost_amode_step2[j] = temp;
3362
3363 temp = modes_4x4[i];
3364 modes_4x4[i] = modes_4x4[j];
3365 modes_4x4[j] = temp;
3366 }
3367 }
3368 }
3369
3370 /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
3371 best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
3372 cost_ang_mode = cost_amode_step2[0];
3373 ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode];
3374 ps_cu_node->best_cost = cost_amode_step2[0];
3375 ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
3376 ps_cu_node->best_satd =
3377 ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode];
3378
3379 /*Accumalate best mode bits cost for RC*/
3380 ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode];
3381
3382 /* Store the best three candidates */
3383 for(i = 0; i < 3; i++)
3384 {
3385 best_costs_4x4[i] = cost_amode_step2[i];
3386 best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]];
3387 }
3388 }
3389
3390 return;
3391 }
3392