1 /******************************************************************************
2 *
3 * Copyright (C) 2015 The Android Open Source Project
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at:
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 *****************************************************************************
18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20
21 /**
22 *******************************************************************************
23 * @file
24 * ih264e_process.c
25 *
26 * @brief
27 * Contains functions for codec thread
28 *
29 * @author
30 * Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 * None
48 *
49 *******************************************************************************
50 */
51
52 /*****************************************************************************/
53 /* File Includes */
54 /*****************************************************************************/
55
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115
116
117 /*****************************************************************************/
118 /* Function Definitions */
119 /*****************************************************************************/
120
121 /**
122 ******************************************************************************
123 *
124 * @brief This function generates sps, pps set on request
125 *
126 * @par Description
127 * When the encoder is set in header generation mode, the following function
128 * is called. This generates sps and pps headers and returns the control back
129 * to caller.
130 *
131 * @param[in] ps_codec
132 * pointer to codec context
133 *
134 * @return success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140 /* choose between ping-pong process buffer set */
141 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142
143 /* entropy ctxt */
144 entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145
146 /* Bitstream structure */
147 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148
149 /* sps */
150 sps_t *ps_sps = NULL;
151
152 /* pps */
153 pps_t *ps_pps = NULL;
154
155 /* output buff */
156 out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157
158
159 /********************************************************************/
160 /* initialize the bit stream buffer */
161 /********************************************************************/
162 ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163
164 /********************************************************************/
165 /* BEGIN HEADER GENERATION */
166 /********************************************************************/
167 /*ps_codec->i4_pps_id ++;*/
168 ps_codec->i4_pps_id %= MAX_PPS_CNT;
169
170 /*ps_codec->i4_sps_id ++;*/
171 ps_codec->i4_sps_id %= MAX_SPS_CNT;
172
173 /* populate sps header */
174 ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175 ih264e_populate_sps(ps_codec, ps_sps);
176
177 /* populate pps header */
178 ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179 ih264e_populate_pps(ps_codec, ps_pps);
180
181 ps_entropy->i4_error_code = IH264E_SUCCESS;
182
183 /* generate sps */
184 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
185
186 /* generate pps */
187 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
188
189 /* queue output buffer */
190 ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
191
192 return ps_entropy->i4_error_code;
193 }
194
195 /**
196 *******************************************************************************
197 *
198 * @brief initialize entropy context.
199 *
200 * @par Description:
201 * Before invoking the call to perform to entropy coding the entropy context
202 * associated with the job needs to be initialized. This involves the start
203 * mb address, end mb address, slice index and the pointer to location at
204 * which the mb residue info and mb header info are packed.
205 *
206 * @param[in] ps_proc
207 * Pointer to the current process context
208 *
209 * @returns error status
210 *
211 * @remarks none
212 *
213 *******************************************************************************
214 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)215 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
216 {
217 /* codec context */
218 codec_t *ps_codec = ps_proc->ps_codec;
219
220 /* entropy ctxt */
221 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
222
223 /* start address */
224 ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
225
226 /* end address */
227 ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
228
229 /* slice index */
230 ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
231
232 /* sof */
233 /* @ start of frame or start of a new slice, set sof flag */
234 if (ps_entropy->i4_mb_start_add == 0)
235 {
236 ps_entropy->i4_sof = 1;
237 }
238
239 if (ps_entropy->i4_mb_x == 0)
240 {
241 /* packed mb coeff data */
242 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
243 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
244
245 /* packed mb header data */
246 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
247 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
248 }
249
250 return IH264E_SUCCESS;
251 }
252
253 /**
254 *******************************************************************************
255 *
256 * @brief entry point for entropy coding
257 *
258 * @par Description
259 * This function calls lower level functions to perform entropy coding for a
260 * group (n rows) of mb's. After encoding 1 row of mb's, the function takes
261 * back the control, updates the ctxt and calls lower level functions again.
262 * This process is repeated till all the rows or group of mb's (which ever is
263 * minimum) are coded
264 *
265 * @param[in] ps_proc
266 * process context
267 *
268 * @returns error status
269 *
270 * @remarks
271 *
272 *******************************************************************************
273 */
274
ih264e_entropy(process_ctxt_t * ps_proc)275 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
276 {
277 /* codec context */
278 codec_t *ps_codec = ps_proc->ps_codec;
279
280 /* entropy context */
281 entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
282
283 /* cabac context */
284 cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
285
286 /* sps */
287 sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
288
289 /* pps */
290 pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
291
292 /* slice header */
293 slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
294
295 /* slice type */
296 WORD32 i4_slice_type = ps_proc->i4_slice_type;
297
298 /* Bitstream structure */
299 bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
300
301 /* output buff */
302 out_buf_t s_out_buf;
303
304 /* proc map */
305 UWORD8 *pu1_proc_map;
306
307 /* entropy map */
308 UWORD8 *pu1_entropy_map_curr;
309
310 /* proc base idx */
311 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
312
313 /* temp var */
314 WORD32 i4_wd_mbs, i4_ht_mbs;
315 UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
316 WORD32 bitstream_start_offset, bitstream_end_offset;
317 /********************************************************************/
318 /* BEGIN INIT */
319 /********************************************************************/
320
321 /* entropy encode start address */
322 u4_mb_idx = ps_entropy->i4_mb_start_add;
323
324 /* entropy encode end address */
325 u4_mb_end_idx = ps_entropy->i4_mb_end_add;
326
327 /* width in mbs */
328 i4_wd_mbs = ps_entropy->i4_wd_mbs;
329
330 /* height in mbs */
331 i4_ht_mbs = ps_entropy->i4_ht_mbs;
332
333 /* total mb cnt */
334 u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
335
336 /* proc map */
337 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
338
339 /* entropy map */
340 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
341
342 /********************************************************************/
343 /* @ start of frame / slice, */
344 /* initialize the output buffer, */
345 /* initialize the bit stream buffer, */
346 /* check if sps and pps headers have to be generated, */
347 /* populate and generate slice header */
348 /********************************************************************/
349 if (ps_entropy->i4_sof)
350 {
351 /********************************************************************/
352 /* initialize the output buffer */
353 /********************************************************************/
354 s_out_buf = ps_codec->as_out_buf[ctxt_sel];
355
356 /* is last frame to encode */
357 s_out_buf.u4_is_last = ps_entropy->u4_is_last;
358
359 /* frame idx */
360 s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
361 s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
362
363 /********************************************************************/
364 /* initialize the bit stream buffer */
365 /********************************************************************/
366 ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
367
368 /********************************************************************/
369 /* BEGIN HEADER GENERATION */
370 /********************************************************************/
371 if (1 == ps_entropy->i4_gen_header)
372 {
373 /* generate sps */
374 ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
375
376 /* generate pps */
377 ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
378
379 /* reset i4_gen_header */
380 ps_entropy->i4_gen_header = 0;
381 }
382
383 /* populate slice header */
384 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
385
386 /* generate slice header */
387 ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
388 ps_pps, ps_sps);
389
390 /* once start of frame / slice is done, you can reset it */
391 /* it is the responsibility of the caller to set this flag */
392 ps_entropy->i4_sof = 0;
393
394 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
395 {
396 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
397 BITSTREAM_FLUSH(ps_bitstrm);
398 ih264e_init_cabac_ctxt(ps_entropy);
399 }
400 }
401
402 /* begin entropy coding for the mb set */
403 while (u4_mb_idx < u4_mb_end_idx)
404 {
405 /* init ptrs/indices */
406 if (ps_entropy->i4_mb_x == i4_wd_mbs)
407 {
408 ps_entropy->i4_mb_y++;
409 ps_entropy->i4_mb_x = 0;
410
411 /* packed mb coeff data */
412 ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
413 ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
414
415 /* packed mb header data */
416 ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
417 ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
418
419 /* proc map */
420 pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
421
422 /* entropy map */
423 pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
424 }
425
426 DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
427 ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
428 ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
429
430 /* wait until the curr mb is core coded */
431 /* The wait for curr mb to be core coded is essential when entropy is launched
432 * as a separate job
433 */
434 while (1)
435 {
436 volatile UWORD8 *pu1_buf1;
437 WORD32 idx = ps_entropy->i4_mb_x;
438
439 pu1_buf1 = pu1_proc_map + idx;
440 if (*pu1_buf1)
441 break;
442 ithread_yield();
443 }
444
445
446 /* write mb layer */
447 ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
448 /* Starting bitstream offset for header in bits */
449 bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
450
451 /* set entropy map */
452 pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
453
454 u4_mb_idx++;
455 ps_entropy->i4_mb_x++;
456 /* check for eof */
457 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
458 {
459 if (ps_entropy->i4_mb_x < i4_wd_mbs)
460 {
461 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
462 }
463 }
464
465 if (ps_entropy->i4_mb_x == i4_wd_mbs)
466 {
467 /* if slices are enabled */
468 if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
469 {
470 /* current slice index */
471 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
472
473 /* slice map */
474 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
475
476 /* No need to open a slice at end of frame. The current slice can be closed at the time
477 * of signaling eof flag.
478 */
479 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
480 != pu1_slice_idx[u4_mb_idx]))
481 {
482 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
483 { /* mb skip run */
484 if ((i4_slice_type != ISLICE)
485 && *ps_entropy->pi4_mb_skip_run)
486 {
487 if (*ps_entropy->pi4_mb_skip_run)
488 {
489 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
490 *ps_entropy->pi4_mb_skip_run = 0;
491 }
492 }
493 /* put rbsp trailing bits for the previous slice */
494 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
495 }
496 else
497 {
498 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
499 }
500
501 /* update slice header pointer */
502 i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
503 ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
504 ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
505
506 /* populate slice header */
507 ps_entropy->i4_mb_start_add = u4_mb_idx;
508 ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
509 ps_sps);
510
511 /* generate slice header */
512 ps_entropy->i4_error_code |= ih264e_generate_slice_header(
513 ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
514 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
515 {
516 BITSTREAM_BYTE_ALIGN(ps_bitstrm);
517 BITSTREAM_FLUSH(ps_bitstrm);
518 ih264e_init_cabac_ctxt(ps_entropy);
519 }
520 }
521 else
522 {
523 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
524 && u4_mb_idx != u4_mb_cnt)
525 {
526 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
527 }
528 }
529 }
530 /* Dont execute any further instructions until store synchronization took place */
531 DATA_SYNC();
532 }
533
534 /* Ending bitstream offset for header in bits */
535 bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
536 ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
537 bitstream_end_offset - bitstream_start_offset;
538 }
539
540 /* check for eof */
541 if (u4_mb_idx == u4_mb_cnt)
542 {
543 /* set end of frame flag */
544 ps_entropy->i4_eof = 1;
545 }
546 else
547 {
548 if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
549 && ps_codec->s_cfg.e_slice_mode
550 != IVE_SLICE_MODE_BLOCKS)
551 {
552 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
553 }
554 }
555
556 if (ps_entropy->i4_eof)
557 {
558 if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
559 {
560 /* mb skip run */
561 if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
562 {
563 if (*ps_entropy->pi4_mb_skip_run)
564 {
565 PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
566 ps_entropy->i4_error_code, "mb skip run");
567 *ps_entropy->pi4_mb_skip_run = 0;
568 }
569 }
570 /* put rbsp trailing bits */
571 ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
572 }
573 else
574 {
575 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
576 }
577
578 /* update current frame stats to rc library */
579 {
580 /* number of bytes to stuff */
581 WORD32 i4_stuff_bytes;
582
583 /* update */
584 i4_stuff_bytes = ih264e_update_rc_post_enc(
585 ps_codec, ctxt_sel,
586 (ps_proc->ps_codec->i4_poc == 0));
587
588 /* cbr rc - house keeping */
589 if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
590 {
591 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
592 }
593 else if (i4_stuff_bytes)
594 {
595 /* add filler nal units */
596 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
597 }
598 }
599
600 /*
601 *Frame number is to be incremented only if the current frame is a
602 * reference frame. After each successful frame encode, we increment
603 * frame number by 1
604 */
605 if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
606 && ps_codec->u4_is_curr_frm_ref)
607 {
608 ps_codec->i4_frame_num++;
609 }
610 /********************************************************************/
611 /* signal the output */
612 /********************************************************************/
613 ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
614 ps_entropy->ps_bitstrm->u4_strm_buf_offset;
615
616 DEBUG("entropy status %x", ps_entropy->i4_error_code);
617 }
618
619 /* allow threads to dequeue entropy jobs */
620 ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
621
622 return ps_entropy->i4_error_code;
623 }
624
625 /**
626 *******************************************************************************
627 *
628 * @brief Packs header information of a mb in to a buffer
629 *
630 * @par Description:
631 * After the deciding the mode info of a macroblock, the syntax elements
632 * associated with the mb are packed and stored. The entropy thread unpacks
633 * this buffer and generates the end bit stream.
634 *
635 * @param[in] ps_proc
636 * Pointer to the current process context
637 *
638 * @returns error status
639 *
640 * @remarks none
641 *
642 *******************************************************************************
643 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)644 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
645 {
646 /* curr mb type */
647 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
648
649 /* pack mb syntax layer of curr mb (used for entropy coding) */
650 if (u4_mb_type == I4x4)
651 {
652 /* pointer to mb header storage space */
653 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
654
655 /* temp var */
656 WORD32 i4, byte;
657
658 /* mb type plus mode */
659 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
660
661 /* cbp */
662 *pu1_ptr++ = ps_proc->u4_cbp;
663
664 /* mb qp delta */
665 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
666
667 /* sub mb modes */
668 for (i4 = 0; i4 < 16; i4 ++)
669 {
670 byte = 0;
671
672 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
673 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
674 {
675 byte |= 1;
676 }
677 else
678 {
679
680 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
681 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
682 {
683 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
684 }
685 else
686 {
687 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
688 }
689 }
690
691 i4++;
692
693 if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
694 ps_proc->au1_intra_luma_mb_4x4_modes[i4])
695 {
696 byte |= 16;
697 }
698 else
699 {
700
701 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
702 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
703 {
704 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
705 }
706 else
707 {
708 byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
709 }
710 }
711
712 *pu1_ptr++ = byte;
713 }
714
715 /* end of mb layer */
716 ps_proc->pv_mb_header_data = pu1_ptr;
717 }
718 else if (u4_mb_type == I16x16)
719 {
720 /* pointer to mb header storage space */
721 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
722
723 /* mb type plus mode */
724 *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
725
726 /* cbp */
727 *pu1_ptr++ = ps_proc->u4_cbp;
728
729 /* mb qp delta */
730 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731
732 /* end of mb layer */
733 ps_proc->pv_mb_header_data = pu1_ptr;
734 }
735 else if (u4_mb_type == P16x16)
736 {
737 /* pointer to mb header storage space */
738 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
739
740 WORD16 *i2_mv_ptr;
741
742 /* mb type plus mode */
743 *pu1_ptr++ = u4_mb_type;
744
745 /* cbp */
746 *pu1_ptr++ = ps_proc->u4_cbp;
747
748 /* mb qp delta */
749 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
750
751 i2_mv_ptr = (WORD16 *)pu1_ptr;
752
753 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
754
755 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
756
757 /* end of mb layer */
758 ps_proc->pv_mb_header_data = i2_mv_ptr;
759 }
760 else if (u4_mb_type == PSKIP)
761 {
762 /* pointer to mb header storage space */
763 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
764
765 /* mb type plus mode */
766 *pu1_ptr++ = u4_mb_type;
767
768 /* end of mb layer */
769 ps_proc->pv_mb_header_data = pu1_ptr;
770 }
771 else if(u4_mb_type == B16x16)
772 {
773
774 /* pointer to mb header storage space */
775 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
776
777 WORD16 *i2_mv_ptr;
778
779 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
780
781 /* mb type plus mode */
782 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
783
784 /* cbp */
785 *pu1_ptr++ = ps_proc->u4_cbp;
786
787 /* mb qp delta */
788 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
789
790 /* l0 & l1 me data */
791 i2_mv_ptr = (WORD16 *)pu1_ptr;
792
793 if (u4_pred_mode != PRED_L1)
794 {
795 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
796 - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
797
798 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
799 - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
800 }
801 if (u4_pred_mode != PRED_L0)
802 {
803 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
804 - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
805
806 *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
807 - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
808 }
809
810 /* end of mb layer */
811 ps_proc->pv_mb_header_data = i2_mv_ptr;
812
813 }
814 else if(u4_mb_type == BDIRECT)
815 {
816 /* pointer to mb header storage space */
817 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
818
819 /* mb type plus mode */
820 *pu1_ptr++ = u4_mb_type;
821
822 /* cbp */
823 *pu1_ptr++ = ps_proc->u4_cbp;
824
825 /* mb qp delta */
826 *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
827
828 ps_proc->pv_mb_header_data = pu1_ptr;
829
830 }
831 else if(u4_mb_type == BSKIP)
832 {
833 UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
834
835 /* pointer to mb header storage space */
836 UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
837
838 /* mb type plus mode */
839 *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
840
841 /* end of mb layer */
842 ps_proc->pv_mb_header_data = pu1_ptr;
843 }
844
845 return IH264E_SUCCESS;
846 }
847
848 /**
849 *******************************************************************************
850 *
851 * @brief update process context after encoding an mb. This involves preserving
852 * the current mb information for later use, initialize the proc ctxt elements to
853 * encode next mb.
854 *
855 * @par Description:
856 * This function performs house keeping tasks after encoding an mb.
857 * After encoding an mb, various elements of the process context needs to be
858 * updated to encode the next mb. For instance, the source, recon and reference
859 * pointers, mb indices have to be adjusted to the next mb. The slice index of
860 * the current mb needs to be updated. If mb qp modulation is enabled, then if
861 * the qp changes the quant param structure needs to be updated. Also to encoding
862 * the next mb, the current mb info is used as part of mode prediction or mv
863 * prediction. Hence the current mb info has to preserved at top/top left/left
864 * locations.
865 *
866 * @param[in] ps_proc
867 * Pointer to the current process context
868 *
869 * @returns none
870 *
871 * @remarks none
872 *
873 *******************************************************************************
874 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)875 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
876 {
877 /* error status */
878 WORD32 error_status = IH264_SUCCESS;
879
880 /* codec context */
881 codec_t *ps_codec = ps_proc->ps_codec;
882
883 /* curr mb indices */
884 WORD32 i4_mb_x = ps_proc->i4_mb_x;
885 WORD32 i4_mb_y = ps_proc->i4_mb_y;
886
887 /* mb syntax elements of neighbors */
888 mb_info_t *ps_left_syn = &ps_proc->s_left_mb_syntax_ele;
889 mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
890 mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
891
892 /* curr mb type */
893 UWORD32 u4_mb_type = ps_proc->u4_mb_type;
894
895 /* curr mb type */
896 UWORD32 u4_is_intra = ps_proc->u4_is_intra;
897
898 /* width in mbs */
899 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
900
901 /*height in mbs*/
902 WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
903
904 /* proc map */
905 UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
906
907 /* deblk context */
908 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
909
910 /* deblk bs context */
911 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
912
913 /* top row motion vector info */
914 enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
915
916 /* top left mb motion vector */
917 enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
918
919 /* left mb motion vector */
920 enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
921
922 /* sub mb modes */
923 UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
924
925 /*************************************************************/
926 /* During MV prediction, when top right mb is not available, */
927 /* top left mb info. is used for prediction. Hence the curr */
928 /* top, which will be top left for the next mb needs to be */
929 /* preserved before updating it with curr mb info. */
930 /*************************************************************/
931
932 /* mb type, mb class, csbp */
933 *ps_top_left_syn = *ps_top_syn;
934
935 if (ps_proc->i4_slice_type != ISLICE)
936 {
937 /*****************************************/
938 /* update top left with top info results */
939 /*****************************************/
940 /* mv */
941 *ps_top_left_mb_pu = *ps_top_row_pu;
942 }
943
944 /*************************************************/
945 /* update top and left with curr mb info results */
946 /*************************************************/
947
948 /* mb type */
949 ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
950
951 /* mb class */
952 ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
953
954 /* csbp */
955 ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
956
957 /* distortion */
958 ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
959
960 if (u4_is_intra)
961 {
962 /* mb / sub mb modes */
963 if (I16x16 == u4_mb_type)
964 {
965 pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
966 }
967 else if (I4x4 == u4_mb_type)
968 {
969 ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
970 ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971 }
972 else if (I8x8 == u4_mb_type)
973 {
974 memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
975 memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976 }
977
978 if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
979 {
980 /* mv */
981 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
982 }
983
984 *ps_proc->pu4_mb_pu_cnt = 1;
985 }
986 else
987 {
988 /* mv */
989 *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990 }
991
992 /*
993 * Mark that the MB has been coded intra
994 * So that future AIRs can skip it
995 */
996 ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
997
998 /**************************************************/
999 /* pack mb header info. for entropy coding */
1000 /**************************************************/
1001 ih264e_pack_header_data(ps_proc);
1002
1003 /* update previous mb qp */
1004 ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1005
1006 /* store qp */
1007 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1008
1009 /*
1010 * We need to sync the cache to make sure that the nmv content of proc
1011 * is updated to cache properly
1012 */
1013 DATA_SYNC();
1014
1015 /* Just before finishing the row, enqueue the job in to entropy queue.
1016 * The master thread depending on its convenience shall dequeue it and
1017 * performs entropy.
1018 *
1019 * WARN !! Placing this block post proc map update can cause queuing of
1020 * entropy jobs in out of order.
1021 */
1022 if (i4_mb_x == i4_wd_mbs - 1)
1023 {
1024 /* job structures */
1025 job_t s_job;
1026
1027 /* job class */
1028 s_job.i4_cmd = CMD_ENTROPY;
1029
1030 /* number of mbs to be processed in the current job */
1031 s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1032
1033 /* job start index x */
1034 s_job.i2_mb_x = 0;
1035
1036 /* job start index y */
1037 s_job.i2_mb_y = ps_proc->i4_mb_y;
1038
1039 /* proc base idx */
1040 s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1041
1042 /* queue the job */
1043 error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1044
1045 if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1046 ih264_list_terminate(ps_codec->pv_entropy_jobq);
1047 }
1048
1049 /* update proc map */
1050 pu1_proc_map[i4_mb_x] = 1;
1051
1052 /**************************************************/
1053 /* update proc ctxt elements for encoding next mb */
1054 /**************************************************/
1055 /* update indices */
1056 i4_mb_x ++;
1057 ps_proc->i4_mb_x = i4_mb_x;
1058
1059 if (ps_proc->i4_mb_x == i4_wd_mbs)
1060 {
1061 ps_proc->i4_mb_y++;
1062 ps_proc->i4_mb_x = 0;
1063 }
1064
1065 /* update slice index */
1066 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1067
1068 /* update buffers pointers */
1069 ps_proc->pu1_src_buf_luma += MB_SIZE;
1070 ps_proc->pu1_rec_buf_luma += MB_SIZE;
1071 ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1072 ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1073
1074 /*
1075 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1076 * the stride per MB is MB_SIZE
1077 */
1078 ps_proc->pu1_src_buf_chroma += MB_SIZE;
1079 ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1080 ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1081 ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1082
1083
1084
1085 /* Reset cost, distortion params */
1086 ps_proc->i4_mb_cost = INT_MAX;
1087 ps_proc->i4_mb_distortion = SHRT_MAX;
1088
1089 ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1090
1091 ps_proc->pu4_mb_pu_cnt += 1;
1092
1093 /* Update colocated pu */
1094 if (ps_proc->i4_slice_type == BSLICE)
1095 ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1096
1097 /* deblk ctxts */
1098 if (ps_proc->u4_disable_deblock_level != 1)
1099 {
1100 /* indices */
1101 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1102 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1103
1104 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1105 ps_deblk->i4_mb_x ++;
1106
1107 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1108 /*
1109 * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1110 * the stride per MB is MB_SIZE
1111 */
1112 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1113 #endif
1114 }
1115
1116 return error_status;
1117 }
1118
1119 /**
1120 *******************************************************************************
1121 *
1122 * @brief initialize process context.
1123 *
1124 * @par Description:
1125 * Before dispatching the current job to process thread, the process context
1126 * associated with the job is initialized. Usually every job aims to encode one
1127 * row of mb's. Basing on the row indices provided by the job, the process
1128 * context's buffer ptrs, slice indices and other elements that are necessary
1129 * during core-coding are initialized.
1130 *
1131 * @param[in] ps_proc
1132 * Pointer to the current process context
1133 *
1134 * @returns error status
1135 *
1136 * @remarks none
1137 *
1138 *******************************************************************************
1139 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1140 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1141 {
1142 /* codec context */
1143 codec_t *ps_codec = ps_proc->ps_codec;
1144
1145 /* nmb processing context*/
1146 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1147
1148 /* indices */
1149 WORD32 i4_mb_x, i4_mb_y;
1150
1151 /* strides */
1152 WORD32 i4_src_strd = ps_proc->i4_src_strd;
1153 WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1154 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1155
1156 /* quant params */
1157 quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1158
1159 /* deblk ctxt */
1160 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1161
1162 /* deblk bs context */
1163 bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1164
1165 /* Pointer to mv_buffer of current frame */
1166 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1167
1168 /* Pointers for color space conversion */
1169 UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1170
1171 /* Pad the MB to support non standard sizes */
1172 UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1173 UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1174 UWORD16 u2_num_rows = MB_SIZE;
1175 WORD32 convert_uv_only;
1176
1177 /********************************************************************/
1178 /* BEGIN INIT */
1179 /********************************************************************/
1180
1181 i4_mb_x = ps_proc->i4_mb_x;
1182 i4_mb_y = ps_proc->i4_mb_y;
1183
1184 /* Number of mbs processed in one loop of process function */
1185 ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1186 ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1187
1188 /* init buffer pointers */
1189 convert_uv_only = 1;
1190 if (u4_pad_bottom_sz || u4_pad_right_sz ||
1191 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1192 {
1193 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1194 u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1195 ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1196 i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1197 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1198 convert_uv_only = 0;
1199 }
1200 else
1201 {
1202 i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1203 ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1204 }
1205
1206
1207 if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1208 ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1209 ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1210 u4_pad_bottom_sz || u4_pad_right_sz)
1211 {
1212 if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1213 (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1214 ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1215
1216 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1217 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1218 }
1219 else
1220 {
1221 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1222 ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1223 }
1224
1225 ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1226 ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1227
1228 /* Tempral back and forward reference buffer */
1229 ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1230 ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1231 ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1232 ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1233
1234 /*
1235 * Do color space conversion
1236 * NOTE : We assume there that the number of MB's to process will not span multiple rows
1237 */
1238 switch (ps_codec->s_cfg.e_inp_color_fmt)
1239 {
1240 case IV_YUV_420SP_UV:
1241 case IV_YUV_420SP_VU:
1242 /* In case of 420 semi-planar input, copy last few rows to intermediate
1243 buffer as chroma trans functions access one extra byte due to interleaved input.
1244 This data will be padded if required */
1245 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1246 {
1247 WORD32 num_rows = MB_SIZE;
1248 UWORD8 *pu1_src;
1249 UWORD8 *pu1_dst;
1250 WORD32 i;
1251 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1252 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1253
1254 pu1_dst = ps_proc->pu1_src_buf_luma;
1255
1256 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1257 if (u4_pad_bottom_sz || u4_pad_right_sz) {
1258 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1259 num_rows = MB_SIZE - u4_pad_bottom_sz;
1260 for (i = 0; i < num_rows; i++)
1261 {
1262 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1263 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1264 pu1_dst += ps_proc->i4_src_strd;
1265 }
1266 }
1267 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1268 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1269 pu1_dst = ps_proc->pu1_src_buf_chroma;
1270
1271 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1272 * due to interleaved input
1273 */
1274 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1275 num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1276 else
1277 num_rows = BLK8x8SIZE;
1278 for (i = 0; i < num_rows; i++)
1279 {
1280 memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1281 pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1282 pu1_dst += ps_proc->i4_src_chroma_strd;
1283 }
1284
1285 }
1286 break;
1287
1288 case IV_YUV_420P :
1289 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1290 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1291
1292 pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1293 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1294
1295 pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1296 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1297
1298 ps_codec->pf_ih264e_conv_420p_to_420sp(
1299 pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1300 ps_proc->pu1_src_buf_luma,
1301 ps_proc->pu1_src_buf_chroma, u2_num_rows,
1302 ps_codec->s_cfg.u4_disp_wd,
1303 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1304 ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1305 ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1306 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1307 convert_uv_only);
1308 break;
1309
1310 case IV_YUV_422ILE :
1311 pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1312 + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1313
1314 ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1315 ps_proc->pu1_src_buf_luma,
1316 ps_proc->pu1_src_buf_chroma,
1317 ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1318 ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1319 ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1320 ps_proc->i4_src_chroma_strd,
1321 ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1322 break;
1323
1324 default:
1325 break;
1326 }
1327
1328 if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1329 {
1330 UWORD32 u4_pad_wd, u4_pad_ht;
1331 u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1332 u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1333 u4_pad_ht = MB_SIZE;
1334 if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1335 u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1336
1337 ih264_pad_right_luma(
1338 ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1339 ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1340
1341 ih264_pad_right_chroma(
1342 ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1343 ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1344 }
1345
1346 /* pad bottom edge */
1347 if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1348 {
1349 ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1350 ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1351
1352 ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1353 ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1354 }
1355
1356
1357 /* packed mb coeff data */
1358 ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1359
1360 /* packed mb header data */
1361 ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1362
1363 /* slice index */
1364 ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1365
1366 /*********************************************************************/
1367 /* ih264e_init_quant_params() routine is called at the pic init level*/
1368 /* this would have initialized the qp. */
1369 /* TODO_LATER: currently it is assumed that quant params donot change*/
1370 /* across mb's. When they do calculate update ps_qp_params accordingly*/
1371 /*********************************************************************/
1372
1373 /* init mv buffer ptr */
1374 ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1375 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1376
1377 /* Init co-located mv buffer */
1378 ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1379 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1380
1381 if (i4_mb_y == 0)
1382 {
1383 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1384 }
1385 else
1386 {
1387 ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1388 ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1389 }
1390
1391 ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1392
1393 /* mb type */
1394 ps_proc->u4_mb_type = I16x16;
1395
1396 /* lambda */
1397 ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1398
1399 /* mb distortion */
1400 ps_proc->i4_mb_distortion = SHRT_MAX;
1401
1402 if (i4_mb_x == 0)
1403 {
1404 ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1405
1406 ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1407
1408 ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1409
1410 if (i4_mb_y == 0)
1411 {
1412 memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1413 }
1414 }
1415
1416 /* mb cost */
1417 ps_proc->i4_mb_cost = INT_MAX;
1418
1419 /**********************/
1420 /* init deblk context */
1421 /**********************/
1422 ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1423 /* deblk lags the current mb proc by 1 row */
1424 /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1425 /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1426 /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1427 ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1428
1429 /* buffer ptrs */
1430 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1431 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1432
1433 /* init deblk bs context */
1434 /* mb indices */
1435 ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1436 ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1437
1438 /* init n_mb_process context */
1439 ps_n_mb_ctxt->i4_mb_x = 0;
1440 ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1441 ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1442
1443 return IH264E_SUCCESS;
1444 }
1445
1446 /**
1447 *******************************************************************************
1448 *
1449 * @brief This function performs luma & chroma padding
1450 *
1451 * @par Description:
1452 *
1453 * @param[in] ps_proc
1454 * Process context corresponding to the job
1455 *
1456 * @param[in] pu1_curr_pic_luma
1457 * Pointer to luma buffer
1458 *
1459 * @param[in] pu1_curr_pic_chroma
1460 * Pointer to chroma buffer
1461 *
1462 * @param[in] i4_mb_x
1463 * mb index x
1464 *
1465 * @param[in] i4_mb_y
1466 * mb index y
1467 *
1468 * @param[in] i4_pad_ht
1469 * number of rows to be padded
1470 *
1471 * @returns error status
1472 *
1473 * @remarks none
1474 *
1475 *******************************************************************************
1476 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1477 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1478 UWORD8 *pu1_curr_pic_luma,
1479 UWORD8 *pu1_curr_pic_chroma,
1480 WORD32 i4_mb_x,
1481 WORD32 i4_mb_y,
1482 WORD32 i4_pad_ht)
1483 {
1484 /* codec context */
1485 codec_t *ps_codec = ps_proc->ps_codec;
1486
1487 /* strides */
1488 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1489
1490 if (i4_mb_x == 0)
1491 {
1492 /* padding left luma */
1493 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1494
1495 /* padding left chroma */
1496 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1497 }
1498 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1499 {
1500 /* padding right luma */
1501 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1502
1503 /* padding right chroma */
1504 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1505
1506 if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1507 {
1508 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1509 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1510
1511 /* padding bottom luma */
1512 ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1513
1514 /* padding bottom chroma */
1515 ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1516 }
1517 }
1518
1519 if (i4_mb_y == 0)
1520 {
1521 UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1522 UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1523 WORD32 wd = MB_SIZE;
1524
1525 if (i4_mb_x == 0)
1526 {
1527 pu1_rec_luma -= PAD_LEFT;
1528 pu1_rec_chroma -= PAD_LEFT;
1529
1530 wd += PAD_LEFT;
1531 }
1532 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1533 {
1534 wd += PAD_RIGHT;
1535 }
1536
1537 /* padding top luma */
1538 ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1539
1540 /* padding top chroma */
1541 ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1542 }
1543
1544 return IH264E_SUCCESS;
1545 }
1546
1547
1548
1549
1550 /**
1551 *******************************************************************************
1552 *
1553 * @brief This function performs deblocking, padding and halfpel generation for
1554 * 'n' MBs
1555 *
1556 * @par Description:
1557 *
1558 * @param[in] ps_proc
1559 * Process context corresponding to the job
1560 *
1561 * @param[in] pu1_curr_pic_luma
1562 * Current MB being processed(Luma)
1563 *
1564 * @param[in] pu1_curr_pic_chroma
1565 * Current MB being processed(Chroma)
1566 *
1567 * @param[in] i4_mb_x
1568 * Column value of current MB processed
1569 *
1570 * @param[in] i4_mb_y
1571 * Curent row processed
1572 *
1573 * @returns error status
1574 *
1575 * @remarks none
1576 *
1577 *******************************************************************************
1578 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1579 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1580 UWORD8 *pu1_curr_pic_luma,
1581 UWORD8 *pu1_curr_pic_chroma,
1582 WORD32 i4_mb_x,
1583 WORD32 i4_mb_y)
1584 {
1585 /* codec context */
1586 codec_t *ps_codec = ps_proc->ps_codec;
1587
1588 /* n_mb processing context */
1589 n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1590
1591 /* deblk context */
1592 deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1593
1594 /* strides */
1595 WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1596
1597 /* loop variables */
1598 WORD32 row, i, j, col;
1599
1600 /* Padding Width */
1601 UWORD32 u4_pad_wd;
1602
1603 /* deblk_map of the row being deblocked */
1604 UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1605
1606 /* deblk_map_previous row */
1607 UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1608
1609 WORD32 u4_pad_top = 0;
1610
1611 WORD32 u4_deblk_prev_row = 0;
1612
1613 /* Number of mbs to be processed */
1614 WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1615
1616 /* Number of mbs actually processed
1617 * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1618 WORD32 i4_n_mb_process_count = 0;
1619
1620 UWORD8 *pu1_pad_bottom_src = NULL;
1621
1622 UWORD8 *pu1_pad_src_luma = NULL;
1623 UWORD8 *pu1_pad_src_chroma = NULL;
1624
1625 if (ps_proc->u4_disable_deblock_level == 1)
1626 {
1627 /* If left most MB is processed, then pad left */
1628 if (i4_mb_x == 0)
1629 {
1630 /* padding left luma */
1631 ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1632
1633 /* padding left chroma */
1634 ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1635 }
1636 /*last col*/
1637 if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1638 {
1639 /* padding right luma */
1640 ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1641
1642 /* padding right chroma */
1643 ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1644 }
1645 }
1646
1647 if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1648 {
1649 /* if number of mb's to be processed are less than 'N', go back.
1650 * exception to the above clause is end of row */
1651 if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1652 {
1653 return IH264E_SUCCESS;
1654 }
1655 else
1656 {
1657 i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1658
1659 /* performing deblocking for required number of MBs */
1660 if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1661 {
1662 u4_deblk_prev_row = 1;
1663
1664 /* checking whether the top rows are deblocked */
1665 for (col = 0; col < i4_n_mb_process_count; col++)
1666 {
1667 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1668 }
1669
1670 /* checking whether the top right MB is deblocked */
1671 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1672 {
1673 u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1674 }
1675
1676 /* Top or Top right MBs not deblocked */
1677 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1678 {
1679 return IH264E_SUCCESS;
1680 }
1681
1682 for (row = 0; row < i4_n_mb_process_count; row++)
1683 {
1684 ih264e_deblock_mb(ps_proc, ps_deblk);
1685
1686 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1687
1688 if (ps_deblk->i4_mb_y > 0)
1689 {
1690 if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1691 {
1692 /* padding left luma */
1693 ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1694
1695 /* padding left chroma */
1696 ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1697 }
1698
1699 if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1700 {
1701 /* padding right luma */
1702 ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1703
1704 /* padding right chroma */
1705 ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1706 }
1707 }
1708 ps_deblk->i4_mb_x++;
1709
1710 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1711 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1712
1713 }
1714 }
1715 else if(i4_mb_y > 0)
1716 {
1717 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1718
1719 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1720 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1721 }
1722
1723 if (i4_mb_y == 2)
1724 {
1725 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1726 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1727
1728 if (ps_n_mb_ctxt->i4_mb_x == 0)
1729 {
1730 u4_pad_wd += PAD_LEFT;
1731 u4_pad_top = -PAD_LEFT;
1732 }
1733
1734 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1735 {
1736 u4_pad_wd += PAD_RIGHT;
1737 }
1738
1739 /* padding top luma */
1740 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1741
1742 /* padding top chroma */
1743 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1744 }
1745
1746 ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1747
1748 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1749 {
1750 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1751 {
1752 /* Bottom Padding is done in one stretch for the entire width */
1753 if (ps_proc->u4_disable_deblock_level != 1)
1754 {
1755 ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1756
1757 ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1758
1759 ps_n_mb_ctxt->i4_mb_x = 0;
1760 ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1761 ps_deblk->i4_mb_x = 0;
1762 ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1763
1764 /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1765 ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1766
1767 i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1768
1769 j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1770
1771 for (i = 0; i < j; i++)
1772 {
1773 for (col = 0; col < i4_n_mbs; col++)
1774 {
1775 ih264e_deblock_mb(ps_proc, ps_deblk);
1776
1777 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1778
1779 ps_deblk->i4_mb_x++;
1780 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1781 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1782 ps_n_mb_ctxt->i4_mb_x++;
1783 }
1784 }
1785
1786 for (col = 0; col < i4_n_mb_process_count; col++)
1787 {
1788 ih264e_deblock_mb(ps_proc, ps_deblk);
1789
1790 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1791
1792 ps_deblk->i4_mb_x++;
1793 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1794 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1795 ps_n_mb_ctxt->i4_mb_x++;
1796 }
1797
1798 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1799
1800 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1801
1802 /* padding left luma */
1803 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1804
1805 /* padding left chroma */
1806 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1807
1808 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1809 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1810
1811 /* padding left luma */
1812 ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1813
1814 /* padding left chroma */
1815 ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1816
1817 pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1818
1819 pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1820
1821 /* padding right luma */
1822 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1823
1824 /* padding right chroma */
1825 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1826
1827 pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1828 pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1829
1830 /* padding right luma */
1831 ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1832
1833 /* padding right chroma */
1834 ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1835
1836 }
1837
1838 /* In case height is less than 2 MBs pad top */
1839 if (ps_proc->i4_ht_mbs <= 2)
1840 {
1841 UWORD8 *pu1_pad_top_src;
1842 /* padding top luma */
1843 pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1844 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1845
1846 /* padding top chroma */
1847 pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1848 ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1849 }
1850
1851 /* padding bottom luma */
1852 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1853 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1854
1855 /* padding bottom chroma */
1856 pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1857 ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1858 }
1859 }
1860 }
1861 }
1862
1863 return IH264E_SUCCESS;
1864 }
1865
1866
1867 /**
1868 *******************************************************************************
1869 *
1870 * @brief This function performs luma & chroma core coding for a set of mb's.
1871 *
1872 * @par Description:
1873 * The mb to be coded is taken and is evaluated over a predefined set of modes
1874 * (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1875 * is selected and using intra/inter prediction filters, prediction is carried out.
1876 * The deviation between src and pred signal constitutes error signal. This error
1877 * signal is transformed (hierarchical transform if necessary) and quantized. The
1878 * quantized residue is packed in to entropy buffer for entropy coding. This is
1879 * repeated for all the mb's enlisted under the job.
1880 *
1881 * @param[in] ps_proc
1882 * Process context corresponding to the job
1883 *
1884 * @returns error status
1885 *
1886 * @remarks none
1887 *
1888 *******************************************************************************
1889 */
ih264e_process(process_ctxt_t * ps_proc)1890 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1891 {
1892 /* error status */
1893 WORD32 error_status = IH264_SUCCESS;
1894
1895 /* codec context */
1896 codec_t *ps_codec = ps_proc->ps_codec;
1897
1898 /* cbp luma, chroma */
1899 UWORD32 u4_cbp_l, u4_cbp_c;
1900
1901 /* width in mbs */
1902 WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1903
1904 /* loop var */
1905 WORD32 i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1906
1907 /* valid modes */
1908 UWORD32 u4_valid_modes = 0;
1909
1910 /* gate threshold */
1911 WORD32 i4_gate_threshold = 0;
1912
1913 /* is intra */
1914 WORD32 luma_idx, chroma_idx, is_intra;
1915
1916 /* temp variables */
1917 WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1918
1919 /*
1920 * list of modes for evaluation
1921 * -------------------------------------------------------------------------
1922 * Note on enabling I4x4 and I16x16
1923 * At very low QP's the hadamard transform in I16x16 will push up the maximum
1924 * coeff value very high. CAVLC may not be able to represent the value and
1925 * hence the stream may not be decodable in some clips.
1926 * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1927 */
1928 if (ps_proc->i4_slice_type == ISLICE)
1929 {
1930 if (ps_proc->u4_frame_qp > 10)
1931 {
1932 /* enable intra 16x16 */
1933 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1934
1935 /* enable intra 8x8 */
1936 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1937 }
1938
1939 /* enable intra 4x4 */
1940 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1941 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1942
1943 }
1944 else if (ps_proc->i4_slice_type == PSLICE)
1945 {
1946 if (ps_proc->u4_frame_qp > 10)
1947 {
1948 /* enable intra 16x16 */
1949 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1950 }
1951
1952 /* enable intra 4x4 */
1953 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1954 {
1955 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1956 }
1957 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1958
1959 /* enable inter P16x16 */
1960 u4_valid_modes |= (1 << P16x16);
1961 }
1962 else if (ps_proc->i4_slice_type == BSLICE)
1963 {
1964 if (ps_proc->u4_frame_qp > 10)
1965 {
1966 /* enable intra 16x16 */
1967 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1968 }
1969
1970 /* enable intra 4x4 */
1971 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1972 {
1973 u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1974 }
1975 u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1976
1977 /* enable inter B16x16 */
1978 u4_valid_modes |= (1 << B16x16);
1979 }
1980
1981
1982 /* init entropy */
1983 ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1984 ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1985 ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1986
1987 /* compute recon when :
1988 * 1. current frame is to be used as a reference
1989 * 2. dump recon for bit stream sanity check
1990 */
1991 ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1992 ps_codec->s_cfg.u4_enable_recon;
1993
1994 /* Encode 'n' macroblocks,
1995 * 'n' being the number of mbs dictated by current proc ctxt */
1996 for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1997 {
1998 /* since we have not yet found sad, we have not yet got min sad */
1999 /* we need to initialize these variables for each MB */
2000 /* TODO how to get the min sad into the codec */
2001 ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2002 ps_proc->u4_min_sad_reached = 0;
2003
2004 /* mb analysis */
2005 {
2006 /* temp var */
2007 WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2008
2009 /* force intra refresh ? */
2010 WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2011 (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
2012 (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2013
2014 /* evaluate inter 16x16 modes */
2015 if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2016 {
2017 /* compute nmb me */
2018 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2019 {
2020 ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2021 i4_wd_mbs - ps_proc->i4_mb_x));
2022 }
2023
2024 /* set pointers to ME data appropriately for other modules to use */
2025 {
2026 UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2027
2028 /* get the min sad condition for current mb */
2029 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2030 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2031
2032 ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2033 ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2034 ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2035
2036 ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2037 ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2038 ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2039 ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2040 ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2041
2042 /* get the best sub pel buffer */
2043 ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2044 ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2045 }
2046 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2047 }
2048 else
2049 {
2050 /* Derive neighbor availability for the current macroblock */
2051 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2052
2053 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2054 }
2055
2056 /*
2057 * If air says intra, we need to force the following code path to evaluate intra
2058 * The easy way is just to say that the inter cost is too much
2059 */
2060 if (!i4_air_enable_inter)
2061 {
2062 ps_proc->u4_min_sad_reached = 0;
2063 ps_proc->i4_mb_cost = INT_MAX;
2064 ps_proc->i4_mb_distortion = INT_MAX;
2065 }
2066 else if (ps_proc->u4_mb_type == PSKIP)
2067 {
2068 goto UPDATE_MB_INFO;
2069 }
2070
2071 /* wait until the proc of [top + 1] mb is computed.
2072 * We wait till the proc dependencies are satisfied */
2073 if(ps_proc->i4_mb_y > 0)
2074 {
2075 /* proc map */
2076 UWORD8 *pu1_proc_map_top;
2077
2078 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2079
2080 while (1)
2081 {
2082 volatile UWORD8 *pu1_buf;
2083 WORD32 idx = i4_mb_idx + 1;
2084
2085 idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2086 pu1_buf = pu1_proc_map_top + idx;
2087 if(*pu1_buf)
2088 break;
2089 ithread_yield();
2090 }
2091 }
2092
2093 /* If we already have the minimum sad, there is no point in searching for sad again */
2094 if (ps_proc->u4_min_sad_reached == 0)
2095 {
2096 /* intra gating in inter slices */
2097 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2098 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2099 {
2100 /* distortion of neighboring blocks */
2101 WORD32 i4_distortion[4];
2102
2103 i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2104
2105 i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2106
2107 i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2108
2109 i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2110
2111 i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2112
2113 }
2114
2115
2116 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2117 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2118 {
2119 /* evaluate intra 4x4 modes */
2120 if (u4_valid_modes & (1 << I4x4))
2121 {
2122 if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2123 {
2124 ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2125 }
2126 else
2127 {
2128 ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2129 }
2130 }
2131
2132 /* evaluate intra 16x16 modes */
2133 if (u4_valid_modes & (1 << I16x16))
2134 {
2135 ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2136 }
2137
2138 /* evaluate intra 8x8 modes */
2139 if (u4_valid_modes & (1 << I8x8))
2140 {
2141 ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2142 }
2143
2144 }
2145 }
2146 }
2147
2148 /* is intra */
2149 if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2150 {
2151 luma_idx = ps_proc->u4_mb_type;
2152 chroma_idx = 0;
2153 is_intra = 1;
2154
2155 /* evaluate chroma blocks for intra */
2156 ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2157 }
2158 else
2159 {
2160 luma_idx = 3;
2161 chroma_idx = 1;
2162 is_intra = 0;
2163 }
2164 ps_proc->u4_is_intra = is_intra;
2165 ps_proc->ps_pu->b1_intra_flag = is_intra;
2166
2167 /* redo MV pred of neighbors in the case intra mb */
2168 /* TODO : currently called unconditionally, needs to be called only in the case of intra
2169 * to modify neighbors */
2170 if (ps_proc->i4_slice_type != ISLICE)
2171 {
2172 ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2173 }
2174
2175 /* Perform luma mb core coding */
2176 u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2177
2178 /* Perform luma mb core coding */
2179 u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2180
2181 /* coded block pattern */
2182 ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2183
2184 if (!ps_proc->u4_is_intra)
2185 {
2186 if (ps_proc->i4_slice_type == BSLICE)
2187 {
2188 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2189 {
2190 ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2191 }
2192 }
2193 else if(!ps_proc->u4_cbp)
2194 {
2195 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2196 {
2197 ps_proc->u4_mb_type = PSKIP;
2198 }
2199 }
2200 }
2201
2202 UPDATE_MB_INFO:
2203
2204 /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2205 ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2206
2207 /**********************************************************************/
2208 /* if disable deblock level is '0' this implies enable deblocking for */
2209 /* all edges of all macroblocks with out any restrictions */
2210 /* */
2211 /* if disable deblock level is '1' this implies disable deblocking for*/
2212 /* all edges of all macroblocks with out any restrictions */
2213 /* */
2214 /* if disable deblock level is '2' this implies enable deblocking for */
2215 /* all edges of all macroblocks except edges overlapping with slice */
2216 /* boundaries. This option is not currently supported by the encoder */
2217 /* hence the slice map should be of no significance to perform debloc */
2218 /* king */
2219 /**********************************************************************/
2220
2221 if (ps_proc->u4_compute_recon)
2222 {
2223 /* deblk context */
2224 /* src pointers */
2225 UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2226 UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2227
2228 /* src indices */
2229 UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2230 UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2231
2232 /* compute blocking strength */
2233 if (ps_proc->u4_disable_deblock_level != 1)
2234 {
2235 ih264e_compute_bs(ps_proc);
2236 }
2237
2238 /* nmb deblocking and hpel and padding */
2239 ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2240 pu1_cur_pic_chroma, i4_mb_x,
2241 i4_mb_y);
2242 }
2243
2244 /* update the context after for coding next mb */
2245 error_status |= ih264e_update_proc_ctxt(ps_proc);
2246
2247 /* Once the last row is processed, mark the buffer status appropriately */
2248 if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2249 {
2250 /* Pointer to current picture buffer structure */
2251 pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2252
2253 /* Pointer to current picture's mv buffer structure */
2254 mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2255
2256 /**********************************************************************/
2257 /* if disable deblock level is '0' this implies enable deblocking for */
2258 /* all edges of all macroblocks with out any restrictions */
2259 /* */
2260 /* if disable deblock level is '1' this implies disable deblocking for*/
2261 /* all edges of all macroblocks with out any restrictions */
2262 /* */
2263 /* if disable deblock level is '2' this implies enable deblocking for */
2264 /* all edges of all macroblocks except edges overlapping with slice */
2265 /* boundaries. This option is not currently supported by the encoder */
2266 /* hence the slice map should be of no significance to perform debloc */
2267 /* king */
2268 /**********************************************************************/
2269 error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2270
2271 error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2272
2273 if (ps_codec->s_cfg.u4_enable_recon)
2274 {
2275 /* pic cnt */
2276 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2277
2278 /* rec buffers */
2279 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf = *ps_proc->ps_cur_pic;
2280
2281 /* is last? */
2282 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2283
2284 /* frame time stamp */
2285 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2286 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2287 }
2288
2289 }
2290 }
2291
2292 DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2293
2294 return error_status;
2295 }
2296
2297 /**
2298 *******************************************************************************
2299 *
2300 * @brief
2301 * Function to update rc context after encoding
2302 *
2303 * @par Description
2304 * This function updates the rate control context after the frame is encoded.
2305 * Number of bits consumed by the current frame, frame distortion, frame cost,
2306 * number of intra/inter mb's, ... are passed on to rate control context for
2307 * updating the rc model.
2308 *
2309 * @param[in] ps_codec
2310 * Handle to codec context
2311 *
2312 * @param[in] ctxt_sel
2313 * frame context selector
2314 *
2315 * @param[in] pic_cnt
2316 * pic count
2317 *
2318 * @returns i4_stuffing_byte
2319 * number of stuffing bytes (if necessary)
2320 *
2321 * @remarks
2322 *
2323 *******************************************************************************
2324 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2325 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2326 {
2327 /* proc set base idx */
2328 WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2329
2330 /* proc ctxt */
2331 process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2332
2333 /* frame qp */
2334 UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2335
2336 /* cbr rc return status */
2337 WORD32 i4_stuffing_byte = 0;
2338
2339 /* current frame stats */
2340 frame_info_t s_frame_info;
2341 picture_type_e rc_pic_type;
2342
2343 /* temp var */
2344 WORD32 i, j;
2345
2346 /********************************************************************/
2347 /* BEGIN INIT */
2348 /********************************************************************/
2349
2350 /* init frame info */
2351 irc_init_frame_info(&s_frame_info);
2352
2353 /* get frame info */
2354 for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2355 {
2356 /*****************************************************************/
2357 /* One frame can be encoded by max of u4_num_cores threads */
2358 /* Accumulating the num mbs, sad, qp and intra_mb_cost from */
2359 /* u4_num_cores threads */
2360 /*****************************************************************/
2361 for (j = 0; j< MAX_MB_TYPE; j++)
2362 {
2363 s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2364
2365 s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2366
2367 s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2368 }
2369
2370 s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2371
2372 s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2373
2374 /*****************************************************************/
2375 /* gather number of residue and header bits consumed by the frame*/
2376 /*****************************************************************/
2377 ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2378 }
2379
2380 /* get pic type */
2381 switch (ps_codec->pic_type)
2382 {
2383 case PIC_I:
2384 case PIC_IDR:
2385 rc_pic_type = I_PIC;
2386 break;
2387 case PIC_P:
2388 rc_pic_type = P_PIC;
2389 break;
2390 case PIC_B:
2391 rc_pic_type = B_PIC;
2392 break;
2393 default:
2394 assert(0);
2395 break;
2396 }
2397
2398 /* update rc lib with current frame stats */
2399 i4_stuffing_byte = ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2400 &(s_frame_info),
2401 ps_codec->s_rate_control.pps_pd_frm_rate,
2402 ps_codec->s_rate_control.pps_time_stamp,
2403 ps_codec->s_rate_control.pps_frame_time,
2404 (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2405 &rc_pic_type,
2406 i4_is_first_frm,
2407 &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2408 u1_frame_qp,
2409 &ps_codec->s_rate_control.num_intra_in_prev_frame,
2410 &ps_codec->s_rate_control.i4_avg_activity);
2411 return i4_stuffing_byte;
2412 }
2413
2414 /**
2415 *******************************************************************************
2416 *
2417 * @brief
2418 * entry point of a spawned encoder thread
2419 *
2420 * @par Description:
2421 * The encoder thread dequeues a proc/entropy job from the encoder queue and
2422 * calls necessary routines.
2423 *
2424 * @param[in] pv_proc
2425 * Process context corresponding to the thread
2426 *
2427 * @returns error status
2428 *
2429 * @remarks
2430 *
2431 *******************************************************************************
2432 */
ih264e_process_thread(void * pv_proc)2433 WORD32 ih264e_process_thread(void *pv_proc)
2434 {
2435 /* error status */
2436 IH264_ERROR_T ret = IH264_SUCCESS;
2437 WORD32 error_status = IH264_SUCCESS;
2438
2439 /* proc ctxt */
2440 process_ctxt_t *ps_proc = pv_proc;
2441
2442 /* codec ctxt */
2443 codec_t *ps_codec = ps_proc->ps_codec;
2444
2445 /* structure to represent a processing job entry */
2446 job_t s_job;
2447
2448 /* blocking call : entropy dequeue is non-blocking till all
2449 * the proc jobs are processed */
2450 WORD32 is_blocking = 0;
2451
2452 /* set affinity */
2453 ithread_set_affinity(ps_proc->i4_id);
2454
2455 while(1)
2456 {
2457 /* dequeue a job from the entropy queue */
2458 {
2459 int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2460
2461 /* codec context selector */
2462 WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2463
2464 volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2465
2466 /* have the lock */
2467 if (error == 0)
2468 {
2469 if (*pu4_buf == 0)
2470 {
2471 /* no entropy threads are active, try dequeuing a job from the entropy queue */
2472 ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2473 if (IH264_SUCCESS == ret)
2474 {
2475 *pu4_buf = 1;
2476 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2477 goto WORKER;
2478 }
2479 else if(is_blocking)
2480 {
2481 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2482 break;
2483 }
2484 }
2485 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2486 }
2487 }
2488
2489 /* dequeue a job from the process queue */
2490 ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2491 if (IH264_SUCCESS != ret)
2492 {
2493 if(ps_proc->i4_id)
2494 break;
2495 else
2496 {
2497 is_blocking = 1;
2498 continue;
2499 }
2500 }
2501
2502 WORKER:
2503 /* choose appropriate proc context based on proc_base_idx */
2504 ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2505
2506 switch (s_job.i4_cmd)
2507 {
2508 case CMD_PROCESS:
2509 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2510 ps_proc->i4_mb_x = s_job.i2_mb_x;
2511 ps_proc->i4_mb_y = s_job.i2_mb_y;
2512
2513 /* init process context */
2514 ih264e_init_proc_ctxt(ps_proc);
2515
2516 /* core code all mbs enlisted under the current job */
2517 error_status |= ih264e_process(ps_proc);
2518 break;
2519
2520 case CMD_ENTROPY:
2521 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2522 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2523 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2524
2525 /* init entropy */
2526 ih264e_init_entropy_ctxt(ps_proc);
2527
2528 /* entropy code all mbs enlisted under the current job */
2529 error_status |= ih264e_entropy(ps_proc);
2530 break;
2531
2532 default:
2533 error_status |= IH264_FAIL;
2534 break;
2535 }
2536 }
2537
2538 /* send error code */
2539 ps_proc->i4_error_code = error_status;
2540 return ret;
2541 }
2542