1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_process.c
25 *
26 * @brief
27 * Contains functions for codec thread
28 *
29 * @author
30 * Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 * None
48 *
49 *******************************************************************************
50 */
51
52 /*****************************************************************************/
53 /* File Includes */
54 /*****************************************************************************/
55
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
120
121 /**
122 ******************************************************************************
123 *
124 * @brief This function generates sps, pps set on request
125 *
126 * @par Description
127 * When the encoder is set in header generation mode, the following function
128 * is called. This generates sps and pps headers and returns the control back
129 * to caller.
130 *
131 * @param[in] ps_codec
132 * pointer to codec context
133 *
134 * @return success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140 /* choose between ping-pong process buffer set */
141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142
143 /* entropy ctxt */
144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146 /* Bitstream structure */
147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149 /* sps */
150 sps_t *ps_sps = NULL;
151
152 /* pps */
153 pps_t *ps_pps = NULL;
154
155 /* output buff */
156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159 /********************************************************************/
160 /* initialize the bit stream buffer */
161 /********************************************************************/
162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164 /********************************************************************/
165 /* BEGIN HEADER GENERATION */
166 /********************************************************************/
167 /*ps_codec->i4_pps_id ++;*/
168 ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170 /*ps_codec->i4_sps_id ++;*/
171 ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173 /* populate sps header */
174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175 ih264e_populate_sps(ps_codec, ps_sps);
176
177 /* populate pps header */
178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179 ih264e_populate_pps(ps_codec, ps_pps);
180
181 ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183 /* generate sps */
184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
185 &ps_codec->s_cfg.s_vui);
186
187 /* generate pps */
188 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
189
190 /* queue output buffer */
191 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
192
193 return ps_entropy->i4_error_code;
194 }
195
196 /**
197 *******************************************************************************
198 *
199 * @brief initialize entropy context.
200 *
201 * @par Description:
202 * Before invoking the call to perform to entropy coding the entropy context
203 * associated with the job needs to be initialized. This involves the start
204 * mb address, end mb address, slice index and the pointer to location at
205 * which the mb residue info and mb header info are packed.
206 *
207 * @param[in] ps_proc
208 * Pointer to the current process context
209 *
210 * @returns error status
211 *
212 * @remarks none
213 *
214 *******************************************************************************
215 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
217 {
218 /* codec context */
219 codec_t *ps_codec = ps_proc->ps_codec;
220
221 /* entropy ctxt */
222 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
223
224 /* start address */
225 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
226
227 /* end address */
228 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
229
230 /* slice index */
231 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
232
233 /* sof */
234 /* @ start of frame or start of a new slice, set sof flag */
235 if (ps_entropy->i4_mb_start_add == 0)
236 {
237 ps_entropy->i4_sof = 1;
238 }
239
240 if (ps_entropy->i4_mb_x == 0)
241 {
242 /* packed mb coeff data */
243 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
244 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
245
246 /* packed mb header data */
247 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
248 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
249 }
250
251 return IH264E_SUCCESS;
252 }
253
254 /**
255 *******************************************************************************
256 *
257 * @brief entry point for entropy coding
258 *
259 * @par Description
260 * This function calls lower level functions to perform entropy coding for a
261 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes
262 * back the control, updates the ctxt and calls lower level functions again.
263 * This process is repeated till all the rows or group of mb's (which ever is
264 * minimum) are coded
265 *
266 * @param[in] ps_proc
267 * process context
268 *
269 * @returns error status
270 *
271 * @remarks
272 *
273 *******************************************************************************
274 */
275
ih264e_entropy(process_ctxt_t * ps_proc)276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
277 {
278 /* codec context */
279 codec_t *ps_codec = ps_proc->ps_codec;
280
281 /* entropy context */
282 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
283
284 /* cabac context */
285 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
286
287 /* sps */
288 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
289
290 /* pps */
291 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
292
293 /* slice header */
294 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
295
296 /* slice type */
297 WORD32 i4_slice_type = ps_proc->i4_slice_type;
298
299 /* Bitstream structure */
300 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
301
302 /* output buff */
303 out_buf_t s_out_buf;
304
305 /* proc map */
306 UWORD8 *pu1_proc_map;
307
308 /* entropy map */
309 UWORD8 *pu1_entropy_map_curr;
310
311 /* proc base idx */
312 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
313
314 /* temp var */
315 WORD32 i4_wd_mbs, i4_ht_mbs;
316 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
317 WORD32 bitstream_start_offset, bitstream_end_offset;
318 /********************************************************************/
319 /* BEGIN INIT */
320 /********************************************************************/
321
322 /* entropy encode start address */
323 u4_mb_idx = ps_entropy->i4_mb_start_add;
324
325 /* entropy encode end address */
326 u4_mb_end_idx = ps_entropy->i4_mb_end_add;
327
328 /* width in mbs */
329 i4_wd_mbs = ps_entropy->i4_wd_mbs;
330
331 /* height in mbs */
332 i4_ht_mbs = ps_entropy->i4_ht_mbs;
333
334 /* total mb cnt */
335 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
336
337 /* proc map */
338 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
339
340 /* entropy map */
341 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
342
343 /********************************************************************/
344 /* @ start of frame / slice, */
345 /* initialize the output buffer, */
346 /* initialize the bit stream buffer, */
347 /* check if sps and pps headers have to be generated, */
348 /* populate and generate slice header */
349 /********************************************************************/
350 if (ps_entropy->i4_sof)
351 {
352 /********************************************************************/
353 /* initialize the output buffer */
354 /********************************************************************/
355 s_out_buf = ps_codec->as_out_buf[ctxt_sel];
356
357 /* is last frame to encode */
358 s_out_buf.u4_is_last = ps_entropy->u4_is_last;
359
360 /* frame idx */
361 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
362 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
363
364 /********************************************************************/
365 /* initialize the bit stream buffer */
366 /********************************************************************/
367 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
368
369 /********************************************************************/
370 /* BEGIN HEADER GENERATION */
371 /********************************************************************/
372 if (1 == ps_entropy->i4_gen_header)
373 {
374 /* generate sps */
375 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
376 &ps_codec->s_cfg.s_vui);
377 /* generate pps */
378 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
379
380 /* reset i4_gen_header */
381 ps_entropy->i4_gen_header = 0;
382 }
383
384 /* populate slice header */
385 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
386
387 /* generate slice header */
388 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
389 ps_pps, ps_sps);
390
391 /* once start of frame / slice is done, you can reset it */
392 /* it is the responsibility of the caller to set this flag */
393 ps_entropy->i4_sof = 0;
394
395 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
396 {
397 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
398 BITSTREAM_FLUSH(ps_bitstrm);
399 ih264e_init_cabac_ctxt(ps_entropy);
400 }
401 }
402
403 /* begin entropy coding for the mb set */
404 while (u4_mb_idx < u4_mb_end_idx)
405 {
406 /* init ptrs/indices */
407 if (ps_entropy->i4_mb_x == i4_wd_mbs)
408 {
409 ps_entropy->i4_mb_y++;
410 ps_entropy->i4_mb_x = 0;
411
412 /* packed mb coeff data */
413 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
414 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
415
416 /* packed mb header data */
417 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
418 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
419
420 /* proc map */
421 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
422
423 /* entropy map */
424 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
425 }
426
427 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
428 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
429 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
430
431 /* wait until the curr mb is core coded */
432 /* The wait for curr mb to be core coded is essential when entropy is launched
433 * as a separate job
434 */
435 while (1)
436 {
437 volatile UWORD8 *pu1_buf1;
438 WORD32 idx = ps_entropy->i4_mb_x;
439
440 pu1_buf1 = pu1_proc_map + idx;
441 if (*pu1_buf1)
442 break;
443 ithread_yield();
444 }
445
446
447 /* write mb layer */
448 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
449 /* Starting bitstream offset for header in bits */
450 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
451
452 /* set entropy map */
453 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
454
455 u4_mb_idx++;
456 ps_entropy->i4_mb_x++;
457 /* check for eof */
458 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
459 {
460 if (ps_entropy->i4_mb_x < i4_wd_mbs)
461 {
462 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
463 }
464 }
465
466 if (ps_entropy->i4_mb_x == i4_wd_mbs)
467 {
468 /* if slices are enabled */
469 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
470 {
471 /* current slice index */
472 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
473
474 /* slice map */
475 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
476
477 /* No need to open a slice at end of frame. The current slice can be closed at the time
478 * of signaling eof flag.
479 */
480 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
481 != pu1_slice_idx[u4_mb_idx]))
482 {
483 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
484 { /* mb skip run */
485 if ((i4_slice_type != ISLICE)
486 && *ps_entropy->pi4_mb_skip_run)
487 {
488 if (*ps_entropy->pi4_mb_skip_run)
489 {
490 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
491 *ps_entropy->pi4_mb_skip_run = 0;
492 }
493 }
494 /* put rbsp trailing bits for the previous slice */
495 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
496 }
497 else
498 {
499 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
500 }
501
502 /* update slice header pointer */
503 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
504 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
505 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
506
507 /* populate slice header */
508 ps_entropy->i4_mb_start_add = u4_mb_idx;
509 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
510 ps_sps);
511
512 /* generate slice header */
513 ps_entropy->i4_error_code |= ih264e_generate_slice_header(
514 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
515 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
516 {
517 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
518 BITSTREAM_FLUSH(ps_bitstrm);
519 ih264e_init_cabac_ctxt(ps_entropy);
520 }
521 }
522 else
523 {
524 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
525 && u4_mb_idx != u4_mb_cnt)
526 {
527 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528 }
529 }
530 }
531 /* Dont execute any further instructions until store synchronization took place */
532 DATA_SYNC();
533 }
534
535 /* Ending bitstream offset for header in bits */
536 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
537 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
538 bitstream_end_offset - bitstream_start_offset;
539 }
540
541 /* check for eof */
542 if (u4_mb_idx == u4_mb_cnt)
543 {
544 /* set end of frame flag */
545 ps_entropy->i4_eof = 1;
546 }
547 else
548 {
549 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
550 && ps_codec->s_cfg.e_slice_mode
551 != IVE_SLICE_MODE_BLOCKS)
552 {
553 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
554 }
555 }
556
557 if (ps_entropy->i4_eof)
558 {
559 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
560 {
561 /* mb skip run */
562 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
563 {
564 if (*ps_entropy->pi4_mb_skip_run)
565 {
566 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
567 ps_entropy->i4_error_code, "mb skip run");
568 *ps_entropy->pi4_mb_skip_run = 0;
569 }
570 }
571 /* put rbsp trailing bits */
572 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
573 }
574 else
575 {
576 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
577 }
578
579 /* update current frame stats to rc library */
580 {
581 /* number of bytes to stuff */
582 WORD32 i4_stuff_bytes;
583
584 /* update */
585 i4_stuff_bytes = ih264e_update_rc_post_enc(
586 ps_codec, ctxt_sel,
587 (ps_proc->ps_codec->i4_poc == 0));
588
589 /* cbr rc - house keeping */
590 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
591 {
592 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
593 }
594 else if (i4_stuff_bytes)
595 {
596 /* add filler nal units */
597 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
598 }
599 }
600
601 /*
602 *Frame number is to be incremented only if the current frame is a
603 * reference frame. After each successful frame encode, we increment
604 * frame number by 1
605 */
606 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
607 && ps_codec->u4_is_curr_frm_ref)
608 {
609 ps_codec->i4_frame_num++;
610 }
611 /********************************************************************/
612 /* signal the output */
613 /********************************************************************/
614 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
615 ps_entropy->ps_bitstrm->u4_strm_buf_offset;
616
617 DEBUG("entropy status %x", ps_entropy->i4_error_code);
618 }
619
620 /* allow threads to dequeue entropy jobs */
621 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
622
623 return ps_entropy->i4_error_code;
624 }
625
626 /**
627 *******************************************************************************
628 *
629 * @brief Packs header information of a mb in to a buffer
630 *
631 * @par Description:
632 * After the deciding the mode info of a macroblock, the syntax elements
633 * associated with the mb are packed and stored. The entropy thread unpacks
634 * this buffer and generates the end bit stream.
635 *
636 * @param[in] ps_proc
637 * Pointer to the current process context
638 *
639 * @returns error status
640 *
641 * @remarks none
642 *
643 *******************************************************************************
644 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
646 {
647 /* curr mb type */
648 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
649
650 /* pack mb syntax layer of curr mb (used for entropy coding) */
651 if (u4_mb_type == I4x4)
652 {
653 /* pointer to mb header storage space */
654 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
655 mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
656
657 /* temp var */
658 WORD32 i4, byte;
659
660 /* mb type plus mode */
661 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
662
663 /* cbp */
664 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
665
666 /* mb qp delta */
667 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
668
669 /* sub mb modes */
670 for (i4 = 0; i4 < 16; i4 ++)
671 {
672 byte = 0;
673
674 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
675 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
676 {
677 byte |= 1;
678 }
679 else
680 {
681
682 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
683 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
684 {
685 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
686 }
687 else
688 {
689 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
690 }
691 }
692
693 i4++;
694
695 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
696 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
697 {
698 byte |= 16;
699 }
700 else
701 {
702
703 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
704 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
705 {
706 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
707 }
708 else
709 {
710 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
711 }
712 }
713
714 ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] = byte;
715 }
716
717 /* end of mb layer */
718 pu1_ptr += sizeof(mb_hdr_i4x4_t);
719 ps_proc->pv_mb_header_data = pu1_ptr;
720 }
721 else if (u4_mb_type == I16x16)
722 {
723 /* pointer to mb header storage space */
724 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
725 mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
726
727 /* mb type plus mode */
728 ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
729
730 /* cbp */
731 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
732
733 /* mb qp delta */
734 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
735
736 /* end of mb layer */
737 pu1_ptr += sizeof(mb_hdr_i16x16_t);
738 ps_proc->pv_mb_header_data = pu1_ptr;
739 }
740 else if (u4_mb_type == P16x16)
741 {
742 /* pointer to mb header storage space */
743 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
744 mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
745
746 /* mb type */
747 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
748
749 /* cbp */
750 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
751
752 /* mb qp delta */
753 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
754
755 ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
756
757 ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
758
759 /* end of mb layer */
760 pu1_ptr += sizeof(mb_hdr_p16x16_t);
761 ps_proc->pv_mb_header_data = pu1_ptr;
762 }
763 else if (u4_mb_type == PSKIP)
764 {
765 /* pointer to mb header storage space */
766 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
767 mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
768
769 /* mb type */
770 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
771
772 /* end of mb layer */
773 pu1_ptr += sizeof(mb_hdr_pskip_t);
774 ps_proc->pv_mb_header_data = pu1_ptr;
775 }
776 else if(u4_mb_type == B16x16)
777 {
778
779 /* pointer to mb header storage space */
780 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
781 mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
782
783 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
784
785 /* mb type plus mode */
786 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
787
788 /* cbp */
789 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
790
791 /* mb qp delta */
792 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
793
794 /* l0 & l1 me data */
795 if (u4_pred_mode != PRED_L1)
796 {
797 ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
798 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
799
800 ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
801 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
802 }
803 if (u4_pred_mode != PRED_L0)
804 {
805 ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
806 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
807
808 ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
809 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
810 }
811
812 /* end of mb layer */
813 pu1_ptr += sizeof(mb_hdr_b16x16_t);
814 ps_proc->pv_mb_header_data = pu1_ptr;
815
816 }
817 else if(u4_mb_type == BDIRECT)
818 {
819 /* pointer to mb header storage space */
820 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
821 mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
822
823 /* mb type plus mode */
824 ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
825
826 /* cbp */
827 ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
828
829 /* mb qp delta */
830 ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
831
832 /* end of mb layer */
833 pu1_ptr += sizeof(mb_hdr_bdirect_t);
834 ps_proc->pv_mb_header_data = pu1_ptr;
835
836 }
837 else if(u4_mb_type == BSKIP)
838 {
839 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
840
841 /* pointer to mb header storage space */
842 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
843 mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
844
845 /* mb type plus mode */
846 ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
847
848 /* end of mb layer */
849 pu1_ptr += sizeof(mb_hdr_bskip_t);
850 ps_proc->pv_mb_header_data = pu1_ptr;
851 }
852
853 return IH264E_SUCCESS;
854 }
855
856 /**
857 *******************************************************************************
858 *
859 * @brief update process context after encoding an mb. This involves preserving
860 * the current mb information for later use, initialize the proc ctxt elements to
861 * encode next mb.
862 *
863 * @par Description:
864 * This function performs house keeping tasks after encoding an mb.
865 * After encoding an mb, various elements of the process context needs to be
866 * updated to encode the next mb. For instance, the source, recon and reference
867 * pointers, mb indices have to be adjusted to the next mb. The slice index of
868 * the current mb needs to be updated. If mb qp modulation is enabled, then if
869 * the qp changes the quant param structure needs to be updated. Also to encoding
870 * the next mb, the current mb info is used as part of mode prediction or mv
871 * prediction. Hence the current mb info has to preserved at top/top left/left
872 * locations.
873 *
874 * @param[in] ps_proc
875 * Pointer to the current process context
876 *
877 * @returns none
878 *
879 * @remarks none
880 *
881 *******************************************************************************
882 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)883 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
884 {
885 /* error status */
886 WORD32 error_status = IH264_SUCCESS;
887
888 /* codec context */
889 codec_t *ps_codec = ps_proc->ps_codec;
890
891 /* curr mb indices */
892 WORD32 i4_mb_x = ps_proc->i4_mb_x;
893 WORD32 i4_mb_y = ps_proc->i4_mb_y;
894
895 /* mb syntax elements of neighbors */
896 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
897 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
898 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
899
900 /* curr mb type */
901 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
902
903 /* curr mb type */
904 UWORD32 u4_is_intra = ps_proc->u4_is_intra;
905
906 /* width in mbs */
907 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
908
909 /*height in mbs*/
910 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
911
912 /* proc map */
913 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
914
915 /* deblk context */
916 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
917
918 /* deblk bs context */
919 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
920
921 /* top row motion vector info */
922 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
923
924 /* top left mb motion vector */
925 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
926
927 /* left mb motion vector */
928 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
929
930 /* sub mb modes */
931 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
932
933 /*************************************************************/
934 /* During MV prediction, when top right mb is not available, */
935 /* top left mb info. is used for prediction. Hence the curr */
936 /* top, which will be top left for the next mb needs to be */
937 /* preserved before updating it with curr mb info. */
938 /*************************************************************/
939
940 /* mb type, mb class, csbp */
941 *ps_top_left_syn = *ps_top_syn;
942
943 if (ps_proc->i4_slice_type != ISLICE)
944 {
945 /*****************************************/
946 /* update top left with top info results */
947 /*****************************************/
948 /* mv */
949 *ps_top_left_mb_pu = *ps_top_row_pu;
950 }
951
952 /*************************************************/
953 /* update top and left with curr mb info results */
954 /*************************************************/
955
956 /* mb type */
957 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
958
959 /* mb class */
960 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
961
962 /* csbp */
963 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
964
965 /* distortion */
966 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
967
968 if (u4_is_intra)
969 {
970 /* mb / sub mb modes */
971 if (I16x16 == u4_mb_type)
972 {
973 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
974 }
975 else if (I4x4 == u4_mb_type)
976 {
977 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
978 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
979 }
980 else if (I8x8 == u4_mb_type)
981 {
982 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
983 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
984 }
985
986 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
987 {
988 /* mv */
989 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990 }
991
992 *ps_proc->pu4_mb_pu_cnt = 1;
993 }
994 else
995 {
996 /* mv */
997 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
998 }
999
1000 /*
1001 * Mark that the MB has been coded intra
1002 * So that future AIRs can skip it
1003 */
1004 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1005
1006 /**************************************************/
1007 /* pack mb header info. for entropy coding */
1008 /**************************************************/
1009 ih264e_pack_header_data(ps_proc);
1010
1011 /* update previous mb qp */
1012 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1013
1014 /* store qp */
1015 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1016
1017 /*
1018 * We need to sync the cache to make sure that the nmv content of proc
1019 * is updated to cache properly
1020 */
1021 DATA_SYNC();
1022
1023 /* Just before finishing the row, enqueue the job in to entropy queue.
1024 * The master thread depending on its convenience shall dequeue it and
1025 * performs entropy.
1026 *
1027 * WARN !! Placing this block post proc map update can cause queuing of
1028 * entropy jobs in out of order.
1029 */
1030 if (i4_mb_x == i4_wd_mbs - 1)
1031 {
1032 /* job structures */
1033 job_t s_job;
1034
1035 /* job class */
1036 s_job.i4_cmd = CMD_ENTROPY;
1037
1038 /* number of mbs to be processed in the current job */
1039 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1040
1041 /* job start index x */
1042 s_job.i2_mb_x = 0;
1043
1044 /* job start index y */
1045 s_job.i2_mb_y = ps_proc->i4_mb_y;
1046
1047 /* proc base idx */
1048 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1049
1050 /* queue the job */
1051 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1052
1053 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1054 ih264_list_terminate(ps_codec->pv_entropy_jobq);
1055 }
1056
1057 /* update proc map */
1058 pu1_proc_map[i4_mb_x] = 1;
1059
1060 /**************************************************/
1061 /* update proc ctxt elements for encoding next mb */
1062 /**************************************************/
1063 /* update indices */
1064 i4_mb_x ++;
1065 ps_proc->i4_mb_x = i4_mb_x;
1066
1067 if (ps_proc->i4_mb_x == i4_wd_mbs)
1068 {
1069 ps_proc->i4_mb_y++;
1070 ps_proc->i4_mb_x = 0;
1071 }
1072
1073 /* update slice index */
1074 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1075
1076 /* update buffers pointers */
1077 ps_proc->pu1_src_buf_luma += MB_SIZE;
1078 ps_proc->pu1_rec_buf_luma += MB_SIZE;
1079 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1080 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1081
1082 /*
1083 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1084 * the stride per MB is MB_SIZE
1085 */
1086 ps_proc->pu1_src_buf_chroma += MB_SIZE;
1087 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1088 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1089 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1090
1091
1092
1093 /* Reset cost, distortion params */
1094 ps_proc->i4_mb_cost = INT_MAX;
1095 ps_proc->i4_mb_distortion = SHRT_MAX;
1096
1097 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1098
1099 ps_proc->pu4_mb_pu_cnt += 1;
1100
1101 /* Update colocated pu */
1102 if (ps_proc->i4_slice_type == BSLICE)
1103 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1104
1105 /* deblk ctxts */
1106 if (ps_proc->u4_disable_deblock_level != 1)
1107 {
1108 /* indices */
1109 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1110 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1111
1112 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1113 ps_deblk->i4_mb_x ++;
1114
1115 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1116 /*
1117 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1118 * the stride per MB is MB_SIZE
1119 */
1120 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1121 #endif
1122 }
1123
1124 return error_status;
1125 }
1126
1127 /**
1128 *******************************************************************************
1129 *
1130 * @brief initialize process context.
1131 *
1132 * @par Description:
1133 * Before dispatching the current job to process thread, the process context
1134 * associated with the job is initialized. Usually every job aims to encode one
1135 * row of mb's. Basing on the row indices provided by the job, the process
1136 * context's buffer ptrs, slice indices and other elements that are necessary
1137 * during core-coding are initialized.
1138 *
1139 * @param[in] ps_proc
1140 * Pointer to the current process context
1141 *
1142 * @returns error status
1143 *
1144 * @remarks none
1145 *
1146 *******************************************************************************
1147 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1148 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1149 {
1150 /* codec context */
1151 codec_t *ps_codec = ps_proc->ps_codec;
1152
1153 /* nmb processing context*/
1154 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1155
1156 /* indices */
1157 WORD32 i4_mb_x, i4_mb_y;
1158
1159 /* strides */
1160 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1161 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1162 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1163
1164 /* quant params */
1165 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1166
1167 /* deblk ctxt */
1168 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1169
1170 /* deblk bs context */
1171 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1172
1173 /* Pointer to mv_buffer of current frame */
1174 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1175
1176 /* Pointers for color space conversion */
1177 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1178
1179 /* Pad the MB to support non standard sizes */
1180 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1181 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1182 UWORD16 u2_num_rows = MB_SIZE;
1183 WORD32 convert_uv_only;
1184
1185 /********************************************************************/
1186 /* BEGIN INIT */
1187 /********************************************************************/
1188
1189 i4_mb_x = ps_proc->i4_mb_x;
1190 i4_mb_y = ps_proc->i4_mb_y;
1191
1192 /* Number of mbs processed in one loop of process function */
1193 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1194 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1195
1196 /* init buffer pointers */
1197 convert_uv_only = 1;
1198 if (u4_pad_bottom_sz || u4_pad_right_sz ||
1199 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1200 {
1201 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1202 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1203 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1204 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1205 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1206 convert_uv_only = 0;
1207 }
1208 else
1209 {
1210 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1211 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1212 }
1213
1214
1215 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1216 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1217 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1218 u4_pad_bottom_sz || u4_pad_right_sz)
1219 {
1220 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1221 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1222 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1223
1224 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1225 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1226 }
1227 else
1228 {
1229 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1230 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1231 }
1232
1233 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1234 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1235
1236 /* Tempral back and forward reference buffer */
1237 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1238 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1239 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1240 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1241
1242 /*
1243 * Do color space conversion
1244 * NOTE : We assume there that the number of MB's to process will not span multiple rows
1245 */
1246 switch (ps_codec->s_cfg.e_inp_color_fmt)
1247 {
1248 case IV_YUV_420SP_UV:
1249 case IV_YUV_420SP_VU:
1250 /* In case of 420 semi-planar input, copy last few rows to intermediate
1251 buffer as chroma trans functions access one extra byte due to interleaved input.
1252 This data will be padded if required */
1253 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1254 {
1255 WORD32 num_rows = MB_SIZE;
1256 UWORD8 *pu1_src;
1257 UWORD8 *pu1_dst;
1258 WORD32 i;
1259 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1260 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1261
1262 pu1_dst = ps_proc->pu1_src_buf_luma;
1263
1264 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1265 if (u4_pad_bottom_sz || u4_pad_right_sz) {
1266 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1267 num_rows = MB_SIZE - u4_pad_bottom_sz;
1268 for (i = 0; i < num_rows; i++)
1269 {
1270 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1271 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1272 pu1_dst += ps_proc->i4_src_strd;
1273 }
1274 }
1275 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1276 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1277 pu1_dst = ps_proc->pu1_src_buf_chroma;
1278
1279 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1280 * due to interleaved input
1281 */
1282 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1283 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1284 else
1285 num_rows = BLK8x8SIZE;
1286 for (i = 0; i < num_rows; i++)
1287 {
1288 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1289 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1290 pu1_dst += ps_proc->i4_src_chroma_strd;
1291 }
1292
1293 }
1294 break;
1295
1296 case IV_YUV_420P :
1297 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1298 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1299
1300 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1301 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1302
1303 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1304 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1305
1306 ps_codec->pf_ih264e_conv_420p_to_420sp(
1307 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1308 ps_proc->pu1_src_buf_luma,
1309 ps_proc->pu1_src_buf_chroma, u2_num_rows,
1310 ps_codec->s_cfg.u4_disp_wd,
1311 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1312 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1313 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1314 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1315 convert_uv_only);
1316 break;
1317
1318 case IV_YUV_422ILE :
1319 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1320 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1321
1322 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1323 ps_proc->pu1_src_buf_luma,
1324 ps_proc->pu1_src_buf_chroma,
1325 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1326 ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1327 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1328 ps_proc->i4_src_chroma_strd,
1329 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1330 break;
1331
1332 default:
1333 break;
1334 }
1335
1336 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1337 {
1338 UWORD32 u4_pad_wd, u4_pad_ht;
1339 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1340 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1341 u4_pad_ht = MB_SIZE;
1342 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1343 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1344
1345 ih264_pad_right_luma(
1346 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1347 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1348
1349 ih264_pad_right_chroma(
1350 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1351 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1352 }
1353
1354 /* pad bottom edge */
1355 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1356 {
1357 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1358 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1359
1360 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1361 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1362 }
1363
1364
1365 /* packed mb coeff data */
1366 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1367
1368 /* packed mb header data */
1369 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1370
1371 /* slice index */
1372 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1373
1374 /*********************************************************************/
1375 /* ih264e_init_quant_params() routine is called at the pic init level*/
1376 /* this would have initialized the qp. */
1377 /* TODO_LATER: currently it is assumed that quant params donot change*/
1378 /* across mb's. When they do calculate update ps_qp_params accordingly*/
1379 /*********************************************************************/
1380
1381 /* init mv buffer ptr */
1382 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1383 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1384
1385 /* Init co-located mv buffer */
1386 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1387 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1388
1389 if (i4_mb_y == 0)
1390 {
1391 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1392 }
1393 else
1394 {
1395 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1396 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1397 }
1398
1399 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1400
1401 /* mb type */
1402 ps_proc->u4_mb_type = I16x16;
1403
1404 /* lambda */
1405 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1406
1407 /* mb distortion */
1408 ps_proc->i4_mb_distortion = SHRT_MAX;
1409
1410 if (i4_mb_x == 0)
1411 {
1412 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1413
1414 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1415
1416 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1417
1418 if (i4_mb_y == 0)
1419 {
1420 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1421 }
1422 }
1423
1424 /* mb cost */
1425 ps_proc->i4_mb_cost = INT_MAX;
1426
1427 /**********************/
1428 /* init deblk context */
1429 /**********************/
1430 ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1431 /* deblk lags the current mb proc by 1 row */
1432 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1433 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1434 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1435 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1436
1437 /* buffer ptrs */
1438 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1439 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1440
1441 /* init deblk bs context */
1442 /* mb indices */
1443 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1444 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1445
1446 /* init n_mb_process context */
1447 ps_n_mb_ctxt->i4_mb_x = 0;
1448 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1449 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1450
1451 return IH264E_SUCCESS;
1452 }
1453
1454 /**
1455 *******************************************************************************
1456 *
1457 * @brief This function performs luma & chroma padding
1458 *
1459 * @par Description:
1460 *
1461 * @param[in] ps_proc
1462 * Process context corresponding to the job
1463 *
1464 * @param[in] pu1_curr_pic_luma
1465 * Pointer to luma buffer
1466 *
1467 * @param[in] pu1_curr_pic_chroma
1468 * Pointer to chroma buffer
1469 *
1470 * @param[in] i4_mb_x
1471 * mb index x
1472 *
1473 * @param[in] i4_mb_y
1474 * mb index y
1475 *
1476 * @param[in] i4_pad_ht
1477 * number of rows to be padded
1478 *
1479 * @returns error status
1480 *
1481 * @remarks none
1482 *
1483 *******************************************************************************
1484 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1485 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1486 UWORD8 *pu1_curr_pic_luma,
1487 UWORD8 *pu1_curr_pic_chroma,
1488 WORD32 i4_mb_x,
1489 WORD32 i4_mb_y,
1490 WORD32 i4_pad_ht)
1491 {
1492 /* codec context */
1493 codec_t *ps_codec = ps_proc->ps_codec;
1494
1495 /* strides */
1496 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1497
1498 if (i4_mb_x == 0)
1499 {
1500 /* padding left luma */
1501 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1502
1503 /* padding left chroma */
1504 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1505 }
1506 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1507 {
1508 /* padding right luma */
1509 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1510
1511 /* padding right chroma */
1512 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1513
1514 if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1515 {
1516 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1517 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1518
1519 /* padding bottom luma */
1520 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1521
1522 /* padding bottom chroma */
1523 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1524 }
1525 }
1526
1527 if (i4_mb_y == 0)
1528 {
1529 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1530 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1531 WORD32 wd = MB_SIZE;
1532
1533 if (i4_mb_x == 0)
1534 {
1535 pu1_rec_luma -= PAD_LEFT;
1536 pu1_rec_chroma -= PAD_LEFT;
1537
1538 wd += PAD_LEFT;
1539 }
1540 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1541 {
1542 wd += PAD_RIGHT;
1543 }
1544
1545 /* padding top luma */
1546 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1547
1548 /* padding top chroma */
1549 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1550 }
1551
1552 return IH264E_SUCCESS;
1553 }
1554
1555
1556
1557
1558 /**
1559 *******************************************************************************
1560 *
1561 * @brief This function performs deblocking, padding and halfpel generation for
1562 * 'n' MBs
1563 *
1564 * @par Description:
1565 *
1566 * @param[in] ps_proc
1567 * Process context corresponding to the job
1568 *
1569 * @param[in] pu1_curr_pic_luma
1570 * Current MB being processed(Luma)
1571 *
1572 * @param[in] pu1_curr_pic_chroma
1573 * Current MB being processed(Chroma)
1574 *
1575 * @param[in] i4_mb_x
1576 * Column value of current MB processed
1577 *
1578 * @param[in] i4_mb_y
1579 * Curent row processed
1580 *
1581 * @returns error status
1582 *
1583 * @remarks none
1584 *
1585 *******************************************************************************
1586 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1587 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1588 UWORD8 *pu1_curr_pic_luma,
1589 UWORD8 *pu1_curr_pic_chroma,
1590 WORD32 i4_mb_x,
1591 WORD32 i4_mb_y)
1592 {
1593 /* codec context */
1594 codec_t *ps_codec = ps_proc->ps_codec;
1595
1596 /* n_mb processing context */
1597 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1598
1599 /* deblk context */
1600 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1601
1602 /* strides */
1603 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1604
1605 /* loop variables */
1606 WORD32 row, i, j, col;
1607
1608 /* Padding Width */
1609 UWORD32 u4_pad_wd;
1610
1611 /* deblk_map of the row being deblocked */
1612 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1613
1614 /* deblk_map_previous row */
1615 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1616
1617 WORD32 u4_pad_top = 0;
1618
1619 WORD32 u4_deblk_prev_row = 0;
1620
1621 /* Number of mbs to be processed */
1622 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1623
1624 /* Number of mbs actually processed
1625 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1626 WORD32 i4_n_mb_process_count = 0;
1627
1628 UWORD8 *pu1_pad_bottom_src = NULL;
1629
1630 UWORD8 *pu1_pad_src_luma = NULL;
1631 UWORD8 *pu1_pad_src_chroma = NULL;
1632
1633 if (ps_proc->u4_disable_deblock_level == 1)
1634 {
1635 /* If left most MB is processed, then pad left */
1636 if (i4_mb_x == 0)
1637 {
1638 /* padding left luma */
1639 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1640
1641 /* padding left chroma */
1642 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1643 }
1644 /*last col*/
1645 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1646 {
1647 /* padding right luma */
1648 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1649
1650 /* padding right chroma */
1651 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1652 }
1653 }
1654
1655 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1656 {
1657 /* if number of mb's to be processed are less than 'N', go back.
1658 * exception to the above clause is end of row */
1659 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1660 {
1661 return IH264E_SUCCESS;
1662 }
1663 else
1664 {
1665 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1666
1667 /* performing deblocking for required number of MBs */
1668 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1669 {
1670 u4_deblk_prev_row = 1;
1671
1672 /* checking whether the top rows are deblocked */
1673 for (col = 0; col < i4_n_mb_process_count; col++)
1674 {
1675 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1676 }
1677
1678 /* checking whether the top right MB is deblocked */
1679 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1680 {
1681 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1682 }
1683
1684 /* Top or Top right MBs not deblocked */
1685 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1686 {
1687 return IH264E_SUCCESS;
1688 }
1689
1690 for (row = 0; row < i4_n_mb_process_count; row++)
1691 {
1692 ih264e_deblock_mb(ps_proc, ps_deblk);
1693
1694 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1695
1696 if (ps_deblk->i4_mb_y > 0)
1697 {
1698 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1699 {
1700 /* padding left luma */
1701 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1702
1703 /* padding left chroma */
1704 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1705 }
1706
1707 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1708 {
1709 /* padding right luma */
1710 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1711
1712 /* padding right chroma */
1713 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1714 }
1715 }
1716 ps_deblk->i4_mb_x++;
1717
1718 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1719 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1720
1721 }
1722 }
1723 else if(i4_mb_y > 0)
1724 {
1725 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1726
1727 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1728 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1729 }
1730
1731 if (i4_mb_y == 2)
1732 {
1733 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1734 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1735
1736 if (ps_n_mb_ctxt->i4_mb_x == 0)
1737 {
1738 u4_pad_wd += PAD_LEFT;
1739 u4_pad_top = -PAD_LEFT;
1740 }
1741
1742 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1743 {
1744 u4_pad_wd += PAD_RIGHT;
1745 }
1746
1747 /* padding top luma */
1748 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1749
1750 /* padding top chroma */
1751 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1752 }
1753
1754 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1755
1756 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1757 {
1758 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1759 {
1760 /* Bottom Padding is done in one stretch for the entire width */
1761 if (ps_proc->u4_disable_deblock_level != 1)
1762 {
1763 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1764
1765 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1766
1767 ps_n_mb_ctxt->i4_mb_x = 0;
1768 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1769 ps_deblk->i4_mb_x = 0;
1770 ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1771
1772 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1773 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1774
1775 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1776
1777 j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1778
1779 for (i = 0; i < j; i++)
1780 {
1781 for (col = 0; col < i4_n_mbs; col++)
1782 {
1783 ih264e_deblock_mb(ps_proc, ps_deblk);
1784
1785 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1786
1787 ps_deblk->i4_mb_x++;
1788 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1789 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1790 ps_n_mb_ctxt->i4_mb_x++;
1791 }
1792 }
1793
1794 for (col = 0; col < i4_n_mb_process_count; col++)
1795 {
1796 ih264e_deblock_mb(ps_proc, ps_deblk);
1797
1798 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1799
1800 ps_deblk->i4_mb_x++;
1801 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1802 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1803 ps_n_mb_ctxt->i4_mb_x++;
1804 }
1805
1806 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1807
1808 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1809
1810 /* padding left luma */
1811 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1812
1813 /* padding left chroma */
1814 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1815
1816 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1817 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1818
1819 /* padding left luma */
1820 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1821
1822 /* padding left chroma */
1823 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1824
1825 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1826
1827 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1828
1829 /* padding right luma */
1830 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1831
1832 /* padding right chroma */
1833 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1834
1835 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1836 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1837
1838 /* padding right luma */
1839 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1840
1841 /* padding right chroma */
1842 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1843
1844 }
1845
1846 /* In case height is less than 2 MBs pad top */
1847 if (ps_proc->i4_ht_mbs <= 2)
1848 {
1849 UWORD8 *pu1_pad_top_src;
1850 /* padding top luma */
1851 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1852 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1853
1854 /* padding top chroma */
1855 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1856 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1857 }
1858
1859 /* padding bottom luma */
1860 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1861 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1862
1863 /* padding bottom chroma */
1864 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1865 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1866 }
1867 }
1868 }
1869 }
1870
1871 return IH264E_SUCCESS;
1872 }
1873
1874
1875 /**
1876 *******************************************************************************
1877 *
1878 * @brief This function performs luma & chroma core coding for a set of mb's.
1879 *
1880 * @par Description:
1881 * The mb to be coded is taken and is evaluated over a predefined set of modes
1882 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1883 * is selected and using intra/inter prediction filters, prediction is carried out.
1884 * The deviation between src and pred signal constitutes error signal. This error
1885 * signal is transformed (hierarchical transform if necessary) and quantized. The
1886 * quantized residue is packed in to entropy buffer for entropy coding. This is
1887 * repeated for all the mb's enlisted under the job.
1888 *
1889 * @param[in] ps_proc
1890 * Process context corresponding to the job
1891 *
1892 * @returns error status
1893 *
1894 * @remarks none
1895 *
1896 *******************************************************************************
1897 */
ih264e_process(process_ctxt_t * ps_proc)1898 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1899 {
1900 /* error status */
1901 WORD32 error_status = IH264_SUCCESS;
1902
1903 /* codec context */
1904 codec_t *ps_codec = ps_proc->ps_codec;
1905
1906 /* cbp luma, chroma */
1907 UWORD32 u4_cbp_l, u4_cbp_c;
1908
1909 /* width in mbs */
1910 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1911
1912 /* loop var */
1913 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1914
1915 /* valid modes */
1916 UWORD32 u4_valid_modes = 0;
1917
1918 /* gate threshold */
1919 WORD32 i4_gate_threshold = 0;
1920
1921 /* is intra */
1922 WORD32 luma_idx, chroma_idx, is_intra;
1923
1924 /* temp variables */
1925 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1926
1927 /*
1928 * list of modes for evaluation
1929 * -------------------------------------------------------------------------
1930 * Note on enabling I4x4 and I16x16
1931 * At very low QP's the hadamard transform in I16x16 will push up the maximum
1932 * coeff value very high. CAVLC may not be able to represent the value and
1933 * hence the stream may not be decodable in some clips.
1934 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1935 */
1936 if (ps_proc->i4_slice_type == ISLICE)
1937 {
1938 if (ps_proc->u4_frame_qp > 10)
1939 {
1940 /* enable intra 16x16 */
1941 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1942
1943 /* enable intra 8x8 */
1944 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1945 }
1946
1947 /* enable intra 4x4 */
1948 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1949 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1950
1951 }
1952 else if (ps_proc->i4_slice_type == PSLICE)
1953 {
1954 if (ps_proc->u4_frame_qp > 10)
1955 {
1956 /* enable intra 16x16 */
1957 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1958 }
1959
1960 /* enable intra 4x4 */
1961 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1962 {
1963 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1964 }
1965 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1966
1967 /* enable inter P16x16 */
1968 u4_valid_modes |= (1 << P16x16);
1969 }
1970 else if (ps_proc->i4_slice_type == BSLICE)
1971 {
1972 if (ps_proc->u4_frame_qp > 10)
1973 {
1974 /* enable intra 16x16 */
1975 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1976 }
1977
1978 /* enable intra 4x4 */
1979 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1980 {
1981 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1982 }
1983 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1984
1985 /* enable inter B16x16 */
1986 u4_valid_modes |= (1 << B16x16);
1987 }
1988
1989
1990 /* init entropy */
1991 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1992 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1993 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1994
1995 /* compute recon when :
1996 * 1. current frame is to be used as a reference
1997 * 2. dump recon for bit stream sanity check
1998 */
1999 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2000 ps_codec->s_cfg.u4_enable_recon;
2001
2002 /* Encode 'n' macroblocks,
2003 * 'n' being the number of mbs dictated by current proc ctxt */
2004 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2005 {
2006 /* since we have not yet found sad, we have not yet got min sad */
2007 /* we need to initialize these variables for each MB */
2008 /* TODO how to get the min sad into the codec */
2009 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2010 ps_proc->u4_min_sad_reached = 0;
2011
2012 /* mb analysis */
2013 {
2014 /* temp var */
2015 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2016
2017 /* force intra refresh ? */
2018 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2019 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2020
2021 /* evaluate inter 16x16 modes */
2022 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2023 {
2024 /* compute nmb me */
2025 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2026 {
2027 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2028 i4_wd_mbs - ps_proc->i4_mb_x));
2029 }
2030
2031 /* set pointers to ME data appropriately for other modules to use */
2032 {
2033 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2034
2035 /* get the min sad condition for current mb */
2036 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2037 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2038
2039 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2040 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2041 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2042
2043 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2044 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2045 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2046 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2047 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2048
2049 /* get the best sub pel buffer */
2050 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2051 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2052 }
2053 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2054 }
2055 else
2056 {
2057 /* Derive neighbor availability for the current macroblock */
2058 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2059
2060 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2061 }
2062
2063 /*
2064 * If air says intra, we need to force the following code path to evaluate intra
2065 * The easy way is just to say that the inter cost is too much
2066 */
2067 if (!i4_air_enable_inter)
2068 {
2069 ps_proc->u4_min_sad_reached = 0;
2070 ps_proc->i4_mb_cost = INT_MAX;
2071 ps_proc->i4_mb_distortion = INT_MAX;
2072 }
2073 else if (ps_proc->u4_mb_type == PSKIP)
2074 {
2075 goto UPDATE_MB_INFO;
2076 }
2077
2078 /* wait until the proc of [top + 1] mb is computed.
2079 * We wait till the proc dependencies are satisfied */
2080 if(ps_proc->i4_mb_y > 0)
2081 {
2082 /* proc map */
2083 UWORD8 *pu1_proc_map_top;
2084
2085 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2086
2087 while (1)
2088 {
2089 volatile UWORD8 *pu1_buf;
2090 WORD32 idx = i4_mb_idx + 1;
2091
2092 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2093 pu1_buf = pu1_proc_map_top + idx;
2094 if(*pu1_buf)
2095 break;
2096 ithread_yield();
2097 }
2098 }
2099
2100 /* If we already have the minimum sad, there is no point in searching for sad again */
2101 if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2102 {
2103 /* intra gating in inter slices */
2104 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2105 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2106 {
2107 /* distortion of neighboring blocks */
2108 WORD32 i4_distortion[4];
2109
2110 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2111
2112 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2113
2114 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2115
2116 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2117
2118 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2119
2120 }
2121
2122
2123 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2124 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2125 {
2126 /* evaluate intra 4x4 modes */
2127 if (u4_valid_modes & (1 << I4x4))
2128 {
2129 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2130 {
2131 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2132 }
2133 else
2134 {
2135 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2136 }
2137 }
2138
2139 /* evaluate intra 16x16 modes */
2140 if (u4_valid_modes & (1 << I16x16))
2141 {
2142 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2143 }
2144
2145 /* evaluate intra 8x8 modes */
2146 if (u4_valid_modes & (1 << I8x8))
2147 {
2148 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2149 }
2150
2151 }
2152 }
2153 }
2154
2155 /* is intra */
2156 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2157 {
2158 luma_idx = ps_proc->u4_mb_type;
2159 chroma_idx = 0;
2160 is_intra = 1;
2161
2162 /* evaluate chroma blocks for intra */
2163 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2164 }
2165 else
2166 {
2167 luma_idx = 3;
2168 chroma_idx = 1;
2169 is_intra = 0;
2170 }
2171 ps_proc->u4_is_intra = is_intra;
2172 ps_proc->ps_pu->b1_intra_flag = is_intra;
2173
2174 /* redo MV pred of neighbors in the case intra mb */
2175 /* TODO : currently called unconditionally, needs to be called only in the case of intra
2176 * to modify neighbors */
2177 if (ps_proc->i4_slice_type != ISLICE)
2178 {
2179 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2180 }
2181
2182 /* Perform luma mb core coding */
2183 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2184
2185 /* Perform luma mb core coding */
2186 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2187
2188 /* coded block pattern */
2189 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2190
2191 if (!ps_proc->u4_is_intra)
2192 {
2193 if (ps_proc->i4_slice_type == BSLICE)
2194 {
2195 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2196 {
2197 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2198 }
2199 }
2200 else if(!ps_proc->u4_cbp)
2201 {
2202 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2203 {
2204 ps_proc->u4_mb_type = PSKIP;
2205 }
2206 }
2207 }
2208
2209 UPDATE_MB_INFO:
2210
2211 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2212 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2213
2214 /**********************************************************************/
2215 /* if disable deblock level is '0' this implies enable deblocking for */
2216 /* all edges of all macroblocks with out any restrictions */
2217 /* */
2218 /* if disable deblock level is '1' this implies disable deblocking for*/
2219 /* all edges of all macroblocks with out any restrictions */
2220 /* */
2221 /* if disable deblock level is '2' this implies enable deblocking for */
2222 /* all edges of all macroblocks except edges overlapping with slice */
2223 /* boundaries. This option is not currently supported by the encoder */
2224 /* hence the slice map should be of no significance to perform debloc */
2225 /* king */
2226 /**********************************************************************/
2227
2228 if (ps_proc->u4_compute_recon)
2229 {
2230 /* deblk context */
2231 /* src pointers */
2232 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2233 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2234
2235 /* src indices */
2236 UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2237 UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2238
2239 /* compute blocking strength */
2240 if (ps_proc->u4_disable_deblock_level != 1)
2241 {
2242 ih264e_compute_bs(ps_proc);
2243 }
2244
2245 /* nmb deblocking and hpel and padding */
2246 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2247 pu1_cur_pic_chroma, i4_mb_x,
2248 i4_mb_y);
2249 }
2250
2251 /* update the context after for coding next mb */
2252 error_status |= ih264e_update_proc_ctxt(ps_proc);
2253
2254 /* Once the last row is processed, mark the buffer status appropriately */
2255 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2256 {
2257 /* Pointer to current picture buffer structure */
2258 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2259
2260 /* Pointer to current picture's mv buffer structure */
2261 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2262
2263 /**********************************************************************/
2264 /* if disable deblock level is '0' this implies enable deblocking for */
2265 /* all edges of all macroblocks with out any restrictions */
2266 /* */
2267 /* if disable deblock level is '1' this implies disable deblocking for*/
2268 /* all edges of all macroblocks with out any restrictions */
2269 /* */
2270 /* if disable deblock level is '2' this implies enable deblocking for */
2271 /* all edges of all macroblocks except edges overlapping with slice */
2272 /* boundaries. This option is not currently supported by the encoder */
2273 /* hence the slice map should be of no significance to perform debloc */
2274 /* king */
2275 /**********************************************************************/
2276 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2277
2278 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2279
2280 if (ps_codec->s_cfg.u4_enable_recon)
2281 {
2282 /* pic cnt */
2283 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2284
2285 /* rec buffers */
2286 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
2287
2288 /* is last? */
2289 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2290
2291 /* frame time stamp */
2292 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2293 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2294 }
2295
2296 }
2297 }
2298
2299 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2300
2301 return error_status;
2302 }
2303
2304 /**
2305 *******************************************************************************
2306 *
2307 * @brief
2308 * Function to update rc context after encoding
2309 *
2310 * @par Description
2311 * This function updates the rate control context after the frame is encoded.
2312 * Number of bits consumed by the current frame, frame distortion, frame cost,
2313 * number of intra/inter mb's, ... are passed on to rate control context for
2314 * updating the rc model.
2315 *
2316 * @param[in] ps_codec
2317 * Handle to codec context
2318 *
2319 * @param[in] ctxt_sel
2320 * frame context selector
2321 *
2322 * @param[in] pic_cnt
2323 * pic count
2324 *
2325 * @returns i4_stuffing_byte
2326 * number of stuffing bytes (if necessary)
2327 *
2328 * @remarks
2329 *
2330 *******************************************************************************
2331 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2332 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2333 {
2334 /* proc set base idx */
2335 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2336
2337 /* proc ctxt */
2338 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2339
2340 /* frame qp */
2341 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2342
2343 /* cbr rc return status */
2344 WORD32 i4_stuffing_byte = 0;
2345
2346 /* current frame stats */
2347 frame_info_t s_frame_info;
2348 picture_type_e rc_pic_type;
2349
2350 /* temp var */
2351 WORD32 i, j;
2352
2353 /********************************************************************/
2354 /* BEGIN INIT */
2355 /********************************************************************/
2356
2357 /* init frame info */
2358 irc_init_frame_info(&s_frame_info);
2359
2360 /* get frame info */
2361 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2362 {
2363 /*****************************************************************/
2364 /* One frame can be encoded by max of u4_num_cores threads */
2365 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
2366 /* u4_num_cores threads */
2367 /*****************************************************************/
2368 for (j = 0; j< MAX_MB_TYPE; j++)
2369 {
2370 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2371
2372 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2373
2374 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2375 }
2376
2377 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2378
2379 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2380
2381 /*****************************************************************/
2382 /* gather number of residue and header bits consumed by the frame*/
2383 /*****************************************************************/
2384 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2385 }
2386
2387 /* get pic type */
2388 switch (ps_codec->pic_type)
2389 {
2390 case PIC_I:
2391 case PIC_IDR:
2392 rc_pic_type = I_PIC;
2393 break;
2394 case PIC_P:
2395 rc_pic_type = P_PIC;
2396 break;
2397 case PIC_B:
2398 rc_pic_type = B_PIC;
2399 break;
2400 default:
2401 assert(0);
2402 break;
2403 }
2404
2405 /* update rc lib with current frame stats */
2406 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2407 &(s_frame_info),
2408 ps_codec->s_rate_control.pps_pd_frm_rate,
2409 ps_codec->s_rate_control.pps_time_stamp,
2410 ps_codec->s_rate_control.pps_frame_time,
2411 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2412 &rc_pic_type,
2413 i4_is_first_frm,
2414 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2415 u1_frame_qp,
2416 &ps_codec->s_rate_control.num_intra_in_prev_frame,
2417 &ps_codec->s_rate_control.i4_avg_activity);
2418 return i4_stuffing_byte;
2419 }
2420
2421 /**
2422 *******************************************************************************
2423 *
2424 * @brief
2425 * entry point of a spawned encoder thread
2426 *
2427 * @par Description:
2428 * The encoder thread dequeues a proc/entropy job from the encoder queue and
2429 * calls necessary routines.
2430 *
2431 * @param[in] pv_proc
2432 * Process context corresponding to the thread
2433 *
2434 * @returns error status
2435 *
2436 * @remarks
2437 *
2438 *******************************************************************************
2439 */
ih264e_process_thread(void * pv_proc)2440 WORD32 ih264e_process_thread(void *pv_proc)
2441 {
2442 /* error status */
2443 IH264_ERROR_T ret = IH264_SUCCESS;
2444 WORD32 error_status = IH264_SUCCESS;
2445
2446 /* proc ctxt */
2447 process_ctxt_t *ps_proc = pv_proc;
2448
2449 /* codec ctxt */
2450 codec_t *ps_codec = ps_proc->ps_codec;
2451
2452 /* structure to represent a processing job entry */
2453 job_t s_job;
2454
2455 /* blocking call : entropy dequeue is non-blocking till all
2456 * the proc jobs are processed */
2457 WORD32 is_blocking = 0;
2458
2459 /* set affinity */
2460 ithread_set_affinity(ps_proc->i4_id);
2461
2462 while(1)
2463 {
2464 /* dequeue a job from the entropy queue */
2465 {
2466 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2467
2468 /* codec context selector */
2469 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2470
2471 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2472
2473 /* have the lock */
2474 if (error == 0)
2475 {
2476 if (*pu4_buf == 0)
2477 {
2478 /* no entropy threads are active, try dequeuing a job from the entropy queue */
2479 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2480 if (IH264_SUCCESS == ret)
2481 {
2482 *pu4_buf = 1;
2483 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2484 goto WORKER;
2485 }
2486 else if(is_blocking)
2487 {
2488 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2489 break;
2490 }
2491 }
2492 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2493 }
2494 }
2495
2496 /* dequeue a job from the process queue */
2497 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2498 if (IH264_SUCCESS != ret)
2499 {
2500 if(ps_proc->i4_id)
2501 break;
2502 else
2503 {
2504 is_blocking = 1;
2505 continue;
2506 }
2507 }
2508
2509 WORKER:
2510 /* choose appropriate proc context based on proc_base_idx */
2511 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2512
2513 switch (s_job.i4_cmd)
2514 {
2515 case CMD_PROCESS:
2516 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2517 ps_proc->i4_mb_x = s_job.i2_mb_x;
2518 ps_proc->i4_mb_y = s_job.i2_mb_y;
2519
2520 /* init process context */
2521 ih264e_init_proc_ctxt(ps_proc);
2522
2523 /* core code all mbs enlisted under the current job */
2524 error_status |= ih264e_process(ps_proc);
2525 break;
2526
2527 case CMD_ENTROPY:
2528 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2529 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2530 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2531
2532 /* init entropy */
2533 ih264e_init_entropy_ctxt(ps_proc);
2534
2535 /* entropy code all mbs enlisted under the current job */
2536 error_status |= ih264e_entropy(ps_proc);
2537 break;
2538
2539 default:
2540 error_status |= IH264_FAIL;
2541 break;
2542 }
2543 }
2544
2545 /* send error code */
2546 ps_proc->i4_error_code = error_status;
2547 return ret;
2548 }
2549