1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_process.c
25 *
26 * @brief
27 *  Contains functions for codec thread
28 *
29 * @author
30 *  Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 *  None
48 *
49 *******************************************************************************
50 */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63 
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115 
116 
117 /*****************************************************************************/
118 /* Function Definitions                                                      */
119 /*****************************************************************************/
120 
121 /**
122 ******************************************************************************
123 *
124 *  @brief This function generates sps, pps set on request
125 *
126 *  @par   Description
127 *  When the encoder is set in header generation mode, the following function
128 *  is called. This generates sps and pps headers and returns the control back
129 *  to caller.
130 *
131 *  @param[in]    ps_codec
132 *  pointer to codec context
133 *
134 *  @return      success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140     /* choose between ping-pong process buffer set */
141     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142 
143     /* entropy ctxt */
144     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145 
146     /* Bitstream structure */
147     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148 
149     /* sps */
150     sps_t *ps_sps = NULL;
151 
152     /* pps */
153     pps_t *ps_pps = NULL;
154 
155     /* output buff */
156     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157 
158 
159     /********************************************************************/
160     /*      initialize the bit stream buffer                            */
161     /********************************************************************/
162     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163 
164     /********************************************************************/
165     /*                    BEGIN HEADER GENERATION                       */
166     /********************************************************************/
167     /*ps_codec->i4_pps_id ++;*/
168     ps_codec->i4_pps_id %= MAX_PPS_CNT;
169 
170     /*ps_codec->i4_sps_id ++;*/
171     ps_codec->i4_sps_id %= MAX_SPS_CNT;
172 
173     /* populate sps header */
174     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175     ih264e_populate_sps(ps_codec, ps_sps);
176 
177     /* populate pps header */
178     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179     ih264e_populate_pps(ps_codec, ps_pps);
180 
181     ps_entropy->i4_error_code = IH264E_SUCCESS;
182 
183     /* generate sps */
184     ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
185 
186     /* generate pps */
187     ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
188 
189     /* queue output buffer */
190     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
191 
192     return ps_entropy->i4_error_code;
193 }
194 
195 /**
196 *******************************************************************************
197 *
198 * @brief   initialize entropy context.
199 *
200 * @par Description:
201 *  Before invoking the call to perform to entropy coding the entropy context
202 *  associated with the job needs to be initialized. This involves the start
203 *  mb address, end mb address, slice index and the pointer to location at
204 *  which the mb residue info and mb header info are packed.
205 *
206 * @param[in] ps_proc
207 *  Pointer to the current process context
208 *
209 * @returns error status
210 *
211 * @remarks none
212 *
213 *******************************************************************************
214 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)215 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
216 {
217     /* codec context */
218     codec_t *ps_codec = ps_proc->ps_codec;
219 
220     /* entropy ctxt */
221     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
222 
223     /* start address */
224     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
225 
226     /* end address */
227     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
228 
229     /* slice index */
230     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
231 
232     /* sof */
233     /* @ start of frame or start of a new slice, set sof flag */
234     if (ps_entropy->i4_mb_start_add == 0)
235     {
236         ps_entropy->i4_sof = 1;
237     }
238 
239     if (ps_entropy->i4_mb_x == 0)
240     {
241         /* packed mb coeff data */
242         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
243                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
244 
245         /* packed mb header data */
246         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
247                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
248     }
249 
250     return IH264E_SUCCESS;
251 }
252 
253 /**
254 *******************************************************************************
255 *
256 * @brief entry point for entropy coding
257 *
258 * @par Description
259 *  This function calls lower level functions to perform entropy coding for a
260 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
261 *  back the control, updates the ctxt and calls lower level functions again.
262 *  This process is repeated till all the rows or group of mb's (which ever is
263 *  minimum) are coded
264 *
265 * @param[in] ps_proc
266 *  process context
267 *
268 * @returns  error status
269 *
270 * @remarks
271 *
272 *******************************************************************************
273 */
274 
ih264e_entropy(process_ctxt_t * ps_proc)275 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
276 {
277     /* codec context */
278     codec_t *ps_codec = ps_proc->ps_codec;
279 
280     /* entropy context */
281     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
282 
283     /* cabac context */
284     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
285 
286     /* sps */
287     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
288 
289     /* pps */
290     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
291 
292     /* slice header */
293     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
294 
295     /* slice type */
296     WORD32 i4_slice_type = ps_proc->i4_slice_type;
297 
298     /* Bitstream structure */
299     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
300 
301     /* output buff */
302     out_buf_t s_out_buf;
303 
304     /* proc map */
305     UWORD8  *pu1_proc_map;
306 
307     /* entropy map */
308     UWORD8  *pu1_entropy_map_curr;
309 
310     /* proc base idx */
311     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
312 
313     /* temp var */
314     WORD32 i4_wd_mbs, i4_ht_mbs;
315     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
316     WORD32 bitstream_start_offset, bitstream_end_offset;
317     /********************************************************************/
318     /*                            BEGIN INIT                            */
319     /********************************************************************/
320 
321     /* entropy encode start address */
322     u4_mb_idx = ps_entropy->i4_mb_start_add;
323 
324     /* entropy encode end address */
325     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
326 
327     /* width in mbs */
328     i4_wd_mbs = ps_entropy->i4_wd_mbs;
329 
330     /* height in mbs */
331     i4_ht_mbs = ps_entropy->i4_ht_mbs;
332 
333     /* total mb cnt */
334     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
335 
336     /* proc map */
337     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
338 
339     /* entropy map */
340     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
341 
342     /********************************************************************/
343     /* @ start of frame / slice,                                        */
344     /*      initialize the output buffer,                               */
345     /*      initialize the bit stream buffer,                           */
346     /*      check if sps and pps headers have to be generated,          */
347     /*      populate and generate slice header                          */
348     /********************************************************************/
349     if (ps_entropy->i4_sof)
350     {
351         /********************************************************************/
352         /*      initialize the output buffer                                */
353         /********************************************************************/
354         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
355 
356         /* is last frame to encode */
357         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
358 
359         /* frame idx */
360         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
361         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
362 
363         /********************************************************************/
364         /*      initialize the bit stream buffer                            */
365         /********************************************************************/
366         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
367 
368         /********************************************************************/
369         /*                    BEGIN HEADER GENERATION                       */
370         /********************************************************************/
371         if (1 == ps_entropy->i4_gen_header)
372         {
373             /* generate sps */
374             ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps);
375 
376             /* generate pps */
377             ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
378 
379             /* reset i4_gen_header */
380             ps_entropy->i4_gen_header = 0;
381         }
382 
383         /* populate slice header */
384         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
385 
386         /* generate slice header */
387         ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
388                                                                   ps_pps, ps_sps);
389 
390         /* once start of frame / slice is done, you can reset it */
391         /* it is the responsibility of the caller to set this flag */
392         ps_entropy->i4_sof = 0;
393 
394         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
395         {
396             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
397             BITSTREAM_FLUSH(ps_bitstrm);
398             ih264e_init_cabac_ctxt(ps_entropy);
399         }
400     }
401 
402     /* begin entropy coding for the mb set */
403     while (u4_mb_idx < u4_mb_end_idx)
404     {
405         /* init ptrs/indices */
406         if (ps_entropy->i4_mb_x == i4_wd_mbs)
407         {
408             ps_entropy->i4_mb_y++;
409             ps_entropy->i4_mb_x = 0;
410 
411             /* packed mb coeff data */
412             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
413                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
414 
415             /* packed mb header data */
416             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
417                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
418 
419             /* proc map */
420             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
421 
422             /* entropy map */
423             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
424         }
425 
426         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
427         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
428         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
429 
430         /* wait until the curr mb is core coded */
431         /* The wait for curr mb to be core coded is essential when entropy is launched
432          * as a separate job
433          */
434         while (1)
435         {
436             volatile UWORD8 *pu1_buf1;
437             WORD32 idx = ps_entropy->i4_mb_x;
438 
439             pu1_buf1 = pu1_proc_map + idx;
440             if (*pu1_buf1)
441                 break;
442             ithread_yield();
443         }
444 
445 
446         /* write mb layer */
447         ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
448         /* Starting bitstream offset for header in bits */
449         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
450 
451         /* set entropy map */
452         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
453 
454         u4_mb_idx++;
455         ps_entropy->i4_mb_x++;
456         /* check for eof */
457         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
458         {
459             if (ps_entropy->i4_mb_x < i4_wd_mbs)
460             {
461                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
462             }
463         }
464 
465         if (ps_entropy->i4_mb_x == i4_wd_mbs)
466         {
467             /* if slices are enabled */
468             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
469             {
470                 /* current slice index */
471                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
472 
473                 /* slice map */
474                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
475 
476                 /* No need to open a slice at end of frame. The current slice can be closed at the time
477                  * of signaling eof flag.
478                  */
479                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
480                                                 != pu1_slice_idx[u4_mb_idx]))
481                 {
482                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
483                     { /* mb skip run */
484                         if ((i4_slice_type != ISLICE)
485                                         && *ps_entropy->pi4_mb_skip_run)
486                         {
487                             if (*ps_entropy->pi4_mb_skip_run)
488                             {
489                             PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
490                                 *ps_entropy->pi4_mb_skip_run = 0;
491                             }
492                         }
493                         /* put rbsp trailing bits for the previous slice */
494                                  ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
495                     }
496                     else
497                     {
498                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
499                     }
500 
501                     /* update slice header pointer */
502                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
503                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
504                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
505 
506                     /* populate slice header */
507                     ps_entropy->i4_mb_start_add = u4_mb_idx;
508                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
509                                                  ps_sps);
510 
511                     /* generate slice header */
512                     ps_entropy->i4_error_code |= ih264e_generate_slice_header(
513                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
514                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
515                     {
516                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
517                         BITSTREAM_FLUSH(ps_bitstrm);
518                         ih264e_init_cabac_ctxt(ps_entropy);
519                     }
520                 }
521                 else
522                 {
523                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
524                                     && u4_mb_idx != u4_mb_cnt)
525                     {
526                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
527                     }
528                 }
529             }
530             /* Dont execute any further instructions until store synchronization took place */
531             DATA_SYNC();
532         }
533 
534         /* Ending bitstream offset for header in bits */
535         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
536         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
537                         bitstream_end_offset - bitstream_start_offset;
538     }
539 
540     /* check for eof */
541     if (u4_mb_idx == u4_mb_cnt)
542     {
543         /* set end of frame flag */
544         ps_entropy->i4_eof = 1;
545     }
546     else
547     {
548         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
549                         && ps_codec->s_cfg.e_slice_mode
550                                         != IVE_SLICE_MODE_BLOCKS)
551         {
552             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
553         }
554     }
555 
556     if (ps_entropy->i4_eof)
557     {
558         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
559         {
560             /* mb skip run */
561             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
562             {
563                 if (*ps_entropy->pi4_mb_skip_run)
564                 {
565                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
566                                  ps_entropy->i4_error_code, "mb skip run");
567                     *ps_entropy->pi4_mb_skip_run = 0;
568                 }
569             }
570             /* put rbsp trailing bits */
571              ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
572         }
573         else
574         {
575             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
576         }
577 
578         /* update current frame stats to rc library */
579         {
580             /* number of bytes to stuff */
581             WORD32 i4_stuff_bytes;
582 
583             /* update */
584             i4_stuff_bytes = ih264e_update_rc_post_enc(
585                             ps_codec, ctxt_sel,
586                             (ps_proc->ps_codec->i4_poc == 0));
587 
588             /* cbr rc - house keeping */
589             if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
590             {
591                 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
592             }
593             else if (i4_stuff_bytes)
594             {
595                 /* add filler nal units */
596                 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
597             }
598         }
599 
600         /*
601          *Frame number is to be incremented only if the current frame is a
602          * reference frame. After each successful frame encode, we increment
603          * frame number by 1
604          */
605         if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
606                         && ps_codec->u4_is_curr_frm_ref)
607         {
608             ps_codec->i4_frame_num++;
609         }
610         /********************************************************************/
611         /*      signal the output                                           */
612         /********************************************************************/
613         ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
614                         ps_entropy->ps_bitstrm->u4_strm_buf_offset;
615 
616         DEBUG("entropy status %x", ps_entropy->i4_error_code);
617     }
618 
619     /* allow threads to dequeue entropy jobs */
620     ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
621 
622     return ps_entropy->i4_error_code;
623 }
624 
625 /**
626 *******************************************************************************
627 *
628 * @brief Packs header information of a mb in to a buffer
629 *
630 * @par Description:
631 *  After the deciding the mode info of a macroblock, the syntax elements
632 *  associated with the mb are packed and stored. The entropy thread unpacks
633 *  this buffer and generates the end bit stream.
634 *
635 * @param[in] ps_proc
636 *  Pointer to the current process context
637 *
638 * @returns error status
639 *
640 * @remarks none
641 *
642 *******************************************************************************
643 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)644 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
645 {
646     /* curr mb type */
647     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
648 
649     /* pack mb syntax layer of curr mb (used for entropy coding) */
650     if (u4_mb_type == I4x4)
651     {
652         /* pointer to mb header storage space */
653         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
654 
655         /* temp var */
656         WORD32 i4, byte;
657 
658         /* mb type plus mode */
659         *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
660 
661         /* cbp */
662         *pu1_ptr++ = ps_proc->u4_cbp;
663 
664         /* mb qp delta */
665         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
666 
667         /* sub mb modes */
668         for (i4 = 0; i4 < 16; i4 ++)
669         {
670             byte = 0;
671 
672             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
673                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
674             {
675                 byte |= 1;
676             }
677             else
678             {
679 
680                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
681                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
682                 {
683                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
684                 }
685                 else
686                 {
687                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
688                 }
689             }
690 
691             i4++;
692 
693             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
694                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
695             {
696                 byte |= 16;
697             }
698             else
699             {
700 
701                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
702                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
703                 {
704                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
705                 }
706                 else
707                 {
708                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
709                 }
710             }
711 
712             *pu1_ptr++ = byte;
713         }
714 
715         /* end of mb layer */
716         ps_proc->pv_mb_header_data = pu1_ptr;
717     }
718     else if (u4_mb_type == I16x16)
719     {
720         /* pointer to mb header storage space */
721         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
722 
723         /* mb type plus mode */
724         *pu1_ptr++ = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
725 
726         /* cbp */
727         *pu1_ptr++ = ps_proc->u4_cbp;
728 
729         /* mb qp delta */
730         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
731 
732         /* end of mb layer */
733         ps_proc->pv_mb_header_data = pu1_ptr;
734     }
735     else if (u4_mb_type == P16x16)
736     {
737         /* pointer to mb header storage space */
738         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
739 
740         WORD16 *i2_mv_ptr;
741 
742         /* mb type plus mode */
743         *pu1_ptr++ = u4_mb_type;
744 
745         /* cbp */
746         *pu1_ptr++ = ps_proc->u4_cbp;
747 
748         /* mb qp delta */
749         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
750 
751         i2_mv_ptr = (WORD16 *)pu1_ptr;
752 
753         *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
754 
755         *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
756 
757         /* end of mb layer */
758         ps_proc->pv_mb_header_data = i2_mv_ptr;
759     }
760     else if (u4_mb_type == PSKIP)
761     {
762         /* pointer to mb header storage space */
763         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
764 
765         /* mb type plus mode */
766         *pu1_ptr++ = u4_mb_type;
767 
768         /* end of mb layer */
769         ps_proc->pv_mb_header_data = pu1_ptr;
770     }
771     else if(u4_mb_type == B16x16)
772     {
773 
774         /* pointer to mb header storage space */
775         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
776 
777         WORD16 *i2_mv_ptr;
778 
779         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
780 
781         /* mb type plus mode */
782         *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
783 
784         /* cbp */
785         *pu1_ptr++ = ps_proc->u4_cbp;
786 
787         /* mb qp delta */
788         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
789 
790         /* l0 & l1 me data */
791         i2_mv_ptr = (WORD16 *)pu1_ptr;
792 
793         if (u4_pred_mode != PRED_L1)
794         {
795             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
796                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
797 
798             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
799                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
800         }
801         if (u4_pred_mode != PRED_L0)
802         {
803             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
804                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
805 
806             *i2_mv_ptr++ = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
807                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
808         }
809 
810         /* end of mb layer */
811         ps_proc->pv_mb_header_data = i2_mv_ptr;
812 
813     }
814     else if(u4_mb_type == BDIRECT)
815     {
816         /* pointer to mb header storage space */
817         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
818 
819         /* mb type plus mode */
820         *pu1_ptr++ = u4_mb_type;
821 
822         /* cbp */
823         *pu1_ptr++ = ps_proc->u4_cbp;
824 
825         /* mb qp delta */
826         *pu1_ptr++ = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
827 
828         ps_proc->pv_mb_header_data = pu1_ptr;
829 
830     }
831     else if(u4_mb_type == BSKIP)
832     {
833         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
834 
835         /* pointer to mb header storage space */
836         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
837 
838         /* mb type plus mode */
839         *pu1_ptr++ = (u4_pred_mode << 4) + u4_mb_type;
840 
841         /* end of mb layer */
842         ps_proc->pv_mb_header_data = pu1_ptr;
843     }
844 
845     return IH264E_SUCCESS;
846 }
847 
848 /**
849 *******************************************************************************
850 *
851 * @brief   update process context after encoding an mb. This involves preserving
852 * the current mb information for later use, initialize the proc ctxt elements to
853 * encode next mb.
854 *
855 * @par Description:
856 *  This function performs house keeping tasks after encoding an mb.
857 *  After encoding an mb, various elements of the process context needs to be
858 *  updated to encode the next mb. For instance, the source, recon and reference
859 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
860 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
861 *  the qp changes the quant param structure needs to be updated. Also to encoding
862 *  the next mb, the current mb info is used as part of mode prediction or mv
863 *  prediction. Hence the current mb info has to preserved at top/top left/left
864 *  locations.
865 *
866 * @param[in] ps_proc
867 *  Pointer to the current process context
868 *
869 * @returns none
870 *
871 * @remarks none
872 *
873 *******************************************************************************
874 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)875 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
876 {
877     /* error status */
878     WORD32 error_status = IH264_SUCCESS;
879 
880     /* codec context */
881     codec_t *ps_codec = ps_proc->ps_codec;
882 
883     /* curr mb indices */
884     WORD32 i4_mb_x = ps_proc->i4_mb_x;
885     WORD32 i4_mb_y = ps_proc->i4_mb_y;
886 
887     /* mb syntax elements of neighbors */
888     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
889     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
890     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
891 
892     /* curr mb type */
893     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
894 
895     /* curr mb type */
896     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
897 
898     /* width in mbs */
899     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
900 
901     /*height in mbs*/
902     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
903 
904     /* proc map */
905     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
906 
907     /* deblk context */
908     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
909 
910     /* deblk bs context */
911     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
912 
913     /* top row motion vector info */
914     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
915 
916     /* top left mb motion vector */
917     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
918 
919     /* left mb motion vector */
920     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
921 
922     /* sub mb modes */
923     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
924 
925     /*************************************************************/
926     /* During MV prediction, when top right mb is not available, */
927     /* top left mb info. is used for prediction. Hence the curr  */
928     /* top, which will be top left for the next mb needs to be   */
929     /* preserved before updating it with curr mb info.           */
930     /*************************************************************/
931 
932     /* mb type, mb class, csbp */
933     *ps_top_left_syn = *ps_top_syn;
934 
935     if (ps_proc->i4_slice_type != ISLICE)
936     {
937         /*****************************************/
938         /* update top left with top info results */
939         /*****************************************/
940         /* mv */
941         *ps_top_left_mb_pu = *ps_top_row_pu;
942     }
943 
944     /*************************************************/
945     /* update top and left with curr mb info results */
946     /*************************************************/
947 
948     /* mb type */
949     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
950 
951     /* mb class */
952     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
953 
954     /* csbp */
955     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
956 
957     /* distortion */
958     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
959 
960     if (u4_is_intra)
961     {
962         /* mb / sub mb modes */
963         if (I16x16 == u4_mb_type)
964         {
965             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
966         }
967         else if (I4x4 == u4_mb_type)
968         {
969             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
970             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
971         }
972         else if (I8x8 == u4_mb_type)
973         {
974             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
975             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
976         }
977 
978         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
979         {
980             /* mv */
981             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
982         }
983 
984         *ps_proc->pu4_mb_pu_cnt = 1;
985     }
986     else
987     {
988         /* mv */
989         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990     }
991 
992     /*
993      * Mark that the MB has been coded intra
994      * So that future AIRs can skip it
995      */
996     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
997 
998     /**************************************************/
999     /* pack mb header info. for entropy coding        */
1000     /**************************************************/
1001     ih264e_pack_header_data(ps_proc);
1002 
1003     /* update previous mb qp */
1004     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1005 
1006     /* store qp */
1007     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1008 
1009     /*
1010      * We need to sync the cache to make sure that the nmv content of proc
1011      * is updated to cache properly
1012      */
1013     DATA_SYNC();
1014 
1015     /* Just before finishing the row, enqueue the job in to entropy queue.
1016      * The master thread depending on its convenience shall dequeue it and
1017      * performs entropy.
1018      *
1019      * WARN !! Placing this block post proc map update can cause queuing of
1020      * entropy jobs in out of order.
1021      */
1022     if (i4_mb_x == i4_wd_mbs - 1)
1023     {
1024         /* job structures */
1025         job_t s_job;
1026 
1027         /* job class */
1028         s_job.i4_cmd = CMD_ENTROPY;
1029 
1030         /* number of mbs to be processed in the current job */
1031         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1032 
1033         /* job start index x */
1034         s_job.i2_mb_x = 0;
1035 
1036         /* job start index y */
1037         s_job.i2_mb_y = ps_proc->i4_mb_y;
1038 
1039         /* proc base idx */
1040         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1041 
1042         /* queue the job */
1043         error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1044 
1045         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1046             ih264_list_terminate(ps_codec->pv_entropy_jobq);
1047     }
1048 
1049     /* update proc map */
1050     pu1_proc_map[i4_mb_x] = 1;
1051 
1052     /**************************************************/
1053     /* update proc ctxt elements for encoding next mb */
1054     /**************************************************/
1055     /* update indices */
1056     i4_mb_x ++;
1057     ps_proc->i4_mb_x = i4_mb_x;
1058 
1059     if (ps_proc->i4_mb_x == i4_wd_mbs)
1060     {
1061         ps_proc->i4_mb_y++;
1062         ps_proc->i4_mb_x = 0;
1063     }
1064 
1065     /* update slice index */
1066     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1067 
1068     /* update buffers pointers */
1069     ps_proc->pu1_src_buf_luma += MB_SIZE;
1070     ps_proc->pu1_rec_buf_luma += MB_SIZE;
1071     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1072     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1073 
1074     /*
1075      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1076      * the stride per MB is MB_SIZE
1077      */
1078     ps_proc->pu1_src_buf_chroma += MB_SIZE;
1079     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1080     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1081     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1082 
1083 
1084 
1085     /* Reset cost, distortion params */
1086     ps_proc->i4_mb_cost = INT_MAX;
1087     ps_proc->i4_mb_distortion = SHRT_MAX;
1088 
1089     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1090 
1091     ps_proc->pu4_mb_pu_cnt += 1;
1092 
1093     /* Update colocated pu */
1094     if (ps_proc->i4_slice_type == BSLICE)
1095         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1096 
1097     /* deblk ctxts */
1098     if (ps_proc->u4_disable_deblock_level != 1)
1099     {
1100         /* indices */
1101         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1102         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1103 
1104 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1105         ps_deblk->i4_mb_x ++;
1106 
1107         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1108         /*
1109          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1110          * the stride per MB is MB_SIZE
1111          */
1112         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1113 #endif
1114     }
1115 
1116     return error_status;
1117 }
1118 
1119 /**
1120 *******************************************************************************
1121 *
1122 * @brief   initialize process context.
1123 *
1124 * @par Description:
1125 *  Before dispatching the current job to process thread, the process context
1126 *  associated with the job is initialized. Usually every job aims to encode one
1127 *  row of mb's. Basing on the row indices provided by the job, the process
1128 *  context's buffer ptrs, slice indices and other elements that are necessary
1129 *  during core-coding are initialized.
1130 *
1131 * @param[in] ps_proc
1132 *  Pointer to the current process context
1133 *
1134 * @returns error status
1135 *
1136 * @remarks none
1137 *
1138 *******************************************************************************
1139 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1140 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1141 {
1142     /* codec context */
1143     codec_t *ps_codec = ps_proc->ps_codec;
1144 
1145     /* nmb processing context*/
1146     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1147 
1148     /* indices */
1149     WORD32 i4_mb_x, i4_mb_y;
1150 
1151     /* strides */
1152     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1153     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1154     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1155 
1156     /* quant params */
1157     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1158 
1159     /* deblk ctxt */
1160     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1161 
1162     /* deblk bs context */
1163     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1164 
1165     /* Pointer to mv_buffer of current frame */
1166     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1167 
1168     /* Pointers for color space conversion */
1169     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1170 
1171     /* Pad the MB to support non standard sizes */
1172     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1173     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1174     UWORD16 u2_num_rows = MB_SIZE;
1175     WORD32 convert_uv_only;
1176 
1177     /********************************************************************/
1178     /*                            BEGIN INIT                            */
1179     /********************************************************************/
1180 
1181     i4_mb_x = ps_proc->i4_mb_x;
1182     i4_mb_y = ps_proc->i4_mb_y;
1183 
1184     /* Number of mbs processed in one loop of process function */
1185     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1186     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1187 
1188     /* init buffer pointers */
1189     convert_uv_only = 1;
1190     if (u4_pad_bottom_sz || u4_pad_right_sz ||
1191         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1192     {
1193         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1194             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1195         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1196         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1197         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1198         convert_uv_only = 0;
1199     }
1200     else
1201     {
1202         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1203         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1204     }
1205 
1206 
1207     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1208         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1209         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1210         u4_pad_bottom_sz || u4_pad_right_sz)
1211     {
1212         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1213             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1214             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1215 
1216         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1217         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1218     }
1219     else
1220     {
1221         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1222         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1223     }
1224 
1225     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1226     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1227 
1228     /* Tempral back and forward reference buffer */
1229     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1230     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1231     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1232     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1233 
1234     /*
1235      * Do color space conversion
1236      * NOTE : We assume there that the number of MB's to process will not span multiple rows
1237      */
1238     switch (ps_codec->s_cfg.e_inp_color_fmt)
1239     {
1240         case IV_YUV_420SP_UV:
1241         case IV_YUV_420SP_VU:
1242             /* In case of 420 semi-planar input, copy last few rows to intermediate
1243                buffer as chroma trans functions access one extra byte due to interleaved input.
1244                This data will be padded if required */
1245             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1246             {
1247                 WORD32 num_rows = MB_SIZE;
1248                 UWORD8 *pu1_src;
1249                 UWORD8 *pu1_dst;
1250                 WORD32 i;
1251                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1252                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1253 
1254                 pu1_dst = ps_proc->pu1_src_buf_luma;
1255 
1256                 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1257                 if (u4_pad_bottom_sz || u4_pad_right_sz) {
1258                     if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1259                         num_rows = MB_SIZE - u4_pad_bottom_sz;
1260                     for (i = 0; i < num_rows; i++)
1261                     {
1262                         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1263                         pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1264                         pu1_dst += ps_proc->i4_src_strd;
1265                     }
1266                 }
1267                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1268                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1269                 pu1_dst = ps_proc->pu1_src_buf_chroma;
1270 
1271                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1272                  * due to interleaved input
1273                  */
1274                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1275                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1276                 else
1277                     num_rows = BLK8x8SIZE;
1278                 for (i = 0; i < num_rows; i++)
1279                 {
1280                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1281                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1282                     pu1_dst += ps_proc->i4_src_chroma_strd;
1283                 }
1284 
1285             }
1286             break;
1287 
1288         case IV_YUV_420P :
1289             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1290                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1291 
1292             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1293                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1294 
1295             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1296                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1297 
1298             ps_codec->pf_ih264e_conv_420p_to_420sp(
1299                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1300                             ps_proc->pu1_src_buf_luma,
1301                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
1302                             ps_codec->s_cfg.u4_disp_wd,
1303                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1304                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1305                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1306                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1307                             convert_uv_only);
1308             break;
1309 
1310         case IV_YUV_422ILE :
1311             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1312                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1313 
1314             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1315                             ps_proc->pu1_src_buf_luma,
1316                             ps_proc->pu1_src_buf_chroma,
1317                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1318                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1319                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1320                             ps_proc->i4_src_chroma_strd,
1321                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1322             break;
1323 
1324         default:
1325             break;
1326     }
1327 
1328     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1329     {
1330         UWORD32 u4_pad_wd, u4_pad_ht;
1331         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1332         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1333         u4_pad_ht = MB_SIZE;
1334         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1335             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1336 
1337         ih264_pad_right_luma(
1338                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1339                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1340 
1341         ih264_pad_right_chroma(
1342                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1343                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1344     }
1345 
1346     /* pad bottom edge */
1347     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1348     {
1349         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1350                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1351 
1352         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1353                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1354     }
1355 
1356 
1357     /* packed mb coeff data */
1358     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1359 
1360     /* packed mb header data */
1361     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1362 
1363     /* slice index */
1364     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1365 
1366     /*********************************************************************/
1367     /* ih264e_init_quant_params() routine is called at the pic init level*/
1368     /* this would have initialized the qp.                               */
1369     /* TODO_LATER: currently it is assumed that quant params donot change*/
1370     /* across mb's. When they do calculate update ps_qp_params accordingly*/
1371     /*********************************************************************/
1372 
1373     /* init mv buffer ptr */
1374     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1375                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1376 
1377     /* Init co-located mv buffer */
1378     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1379                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1380 
1381     if (i4_mb_y == 0)
1382     {
1383         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1384     }
1385     else
1386     {
1387         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1388                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1389     }
1390 
1391     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1392 
1393     /* mb type */
1394     ps_proc->u4_mb_type = I16x16;
1395 
1396     /* lambda */
1397     ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1398 
1399     /* mb distortion */
1400     ps_proc->i4_mb_distortion = SHRT_MAX;
1401 
1402     if (i4_mb_x == 0)
1403     {
1404         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1405 
1406         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1407 
1408         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1409 
1410         if (i4_mb_y == 0)
1411         {
1412             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1413         }
1414     }
1415 
1416     /* mb cost */
1417     ps_proc->i4_mb_cost = INT_MAX;
1418 
1419     /**********************/
1420     /* init deblk context */
1421     /**********************/
1422     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1423     /* deblk lags the current mb proc by 1 row */
1424     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1425     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1426     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1427     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1428 
1429     /* buffer ptrs */
1430     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1431     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1432 
1433     /* init deblk bs context */
1434     /* mb indices */
1435     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1436     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1437 
1438     /* init n_mb_process  context */
1439     ps_n_mb_ctxt->i4_mb_x = 0;
1440     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1441     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1442 
1443     return IH264E_SUCCESS;
1444 }
1445 
1446 /**
1447 *******************************************************************************
1448 *
1449 * @brief This function performs luma & chroma padding
1450 *
1451 * @par Description:
1452 *
1453 * @param[in] ps_proc
1454 *  Process context corresponding to the job
1455 *
1456 * @param[in] pu1_curr_pic_luma
1457 *  Pointer to luma buffer
1458 *
1459 * @param[in] pu1_curr_pic_chroma
1460 *  Pointer to chroma buffer
1461 *
1462 * @param[in] i4_mb_x
1463 *  mb index x
1464 *
1465 * @param[in] i4_mb_y
1466 *  mb index y
1467 *
1468 *  @param[in] i4_pad_ht
1469 *  number of rows to be padded
1470 *
1471 * @returns  error status
1472 *
1473 * @remarks none
1474 *
1475 *******************************************************************************
1476 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1477 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1478                                        UWORD8 *pu1_curr_pic_luma,
1479                                        UWORD8 *pu1_curr_pic_chroma,
1480                                        WORD32 i4_mb_x,
1481                                        WORD32 i4_mb_y,
1482                                        WORD32 i4_pad_ht)
1483 {
1484     /* codec context */
1485     codec_t *ps_codec = ps_proc->ps_codec;
1486 
1487     /* strides */
1488     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1489 
1490     if (i4_mb_x == 0)
1491     {
1492         /* padding left luma */
1493         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1494 
1495         /* padding left chroma */
1496         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1497     }
1498     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1499     {
1500         /* padding right luma */
1501         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1502 
1503         /* padding right chroma */
1504         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1505 
1506         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1507         {
1508             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1509             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1510 
1511             /* padding bottom luma */
1512             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1513 
1514             /* padding bottom chroma */
1515             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1516         }
1517     }
1518 
1519     if (i4_mb_y == 0)
1520     {
1521         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1522         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1523         WORD32 wd = MB_SIZE;
1524 
1525         if (i4_mb_x == 0)
1526         {
1527             pu1_rec_luma -= PAD_LEFT;
1528             pu1_rec_chroma -= PAD_LEFT;
1529 
1530             wd += PAD_LEFT;
1531         }
1532         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1533         {
1534             wd += PAD_RIGHT;
1535         }
1536 
1537         /* padding top luma */
1538         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1539 
1540         /* padding top chroma */
1541         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1542     }
1543 
1544     return IH264E_SUCCESS;
1545 }
1546 
1547 
1548 
1549 
1550 /**
1551 *******************************************************************************
1552 *
1553 * @brief This function performs deblocking, padding and halfpel generation for
1554 *  'n' MBs
1555 *
1556 * @par Description:
1557 *
1558 * @param[in] ps_proc
1559 *  Process context corresponding to the job
1560 *
1561 * @param[in] pu1_curr_pic_luma
1562 * Current MB being processed(Luma)
1563 *
1564 * @param[in] pu1_curr_pic_chroma
1565 * Current MB being processed(Chroma)
1566 *
1567 * @param[in] i4_mb_x
1568 * Column value of current MB processed
1569 *
1570 * @param[in] i4_mb_y
1571 * Curent row processed
1572 *
1573 * @returns  error status
1574 *
1575 * @remarks none
1576 *
1577 *******************************************************************************
1578 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1579 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1580                                                      UWORD8 *pu1_curr_pic_luma,
1581                                                      UWORD8 *pu1_curr_pic_chroma,
1582                                                      WORD32 i4_mb_x,
1583                                                      WORD32 i4_mb_y)
1584 {
1585     /* codec context */
1586     codec_t *ps_codec = ps_proc->ps_codec;
1587 
1588     /* n_mb processing context */
1589     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1590 
1591     /* deblk context */
1592     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1593 
1594     /* strides */
1595     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1596 
1597     /* loop variables */
1598     WORD32 row, i, j, col;
1599 
1600     /* Padding Width */
1601     UWORD32 u4_pad_wd;
1602 
1603     /* deblk_map of the row being deblocked */
1604     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1605 
1606     /* deblk_map_previous row */
1607     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1608 
1609     WORD32 u4_pad_top = 0;
1610 
1611     WORD32 u4_deblk_prev_row = 0;
1612 
1613     /* Number of mbs to be processed */
1614     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1615 
1616     /* Number of mbs  actually processed
1617      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1618     WORD32 i4_n_mb_process_count = 0;
1619 
1620     UWORD8 *pu1_pad_bottom_src = NULL;
1621 
1622     UWORD8 *pu1_pad_src_luma = NULL;
1623     UWORD8 *pu1_pad_src_chroma = NULL;
1624 
1625     if (ps_proc->u4_disable_deblock_level == 1)
1626     {
1627         /* If left most MB is processed, then pad left */
1628         if (i4_mb_x == 0)
1629         {
1630             /* padding left luma */
1631             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1632 
1633             /* padding left chroma */
1634             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1635         }
1636         /*last col*/
1637         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1638         {
1639             /* padding right luma */
1640             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1641 
1642             /* padding right chroma */
1643             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1644         }
1645     }
1646 
1647     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1648     {
1649         /* if number of mb's to be processed are less than 'N', go back.
1650          * exception to the above clause is end of row */
1651         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1652         {
1653             return IH264E_SUCCESS;
1654         }
1655         else
1656         {
1657             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1658 
1659             /* performing deblocking for required number of MBs */
1660             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1661             {
1662                 u4_deblk_prev_row = 1;
1663 
1664                 /* checking whether the top rows are deblocked */
1665                 for (col = 0; col < i4_n_mb_process_count; col++)
1666                 {
1667                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1668                 }
1669 
1670                 /* checking whether the top right MB is deblocked */
1671                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1672                 {
1673                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1674                 }
1675 
1676                 /* Top or Top right MBs not deblocked */
1677                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1678                 {
1679                     return IH264E_SUCCESS;
1680                 }
1681 
1682                 for (row = 0; row < i4_n_mb_process_count; row++)
1683                 {
1684                     ih264e_deblock_mb(ps_proc, ps_deblk);
1685 
1686                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1687 
1688                     if (ps_deblk->i4_mb_y > 0)
1689                     {
1690                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1691                         {
1692                             /* padding left luma */
1693                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1694 
1695                             /* padding left chroma */
1696                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1697                         }
1698 
1699                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1700                         {
1701                             /* padding right luma */
1702                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1703 
1704                             /* padding right chroma */
1705                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1706                         }
1707                     }
1708                     ps_deblk->i4_mb_x++;
1709 
1710                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1711                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1712 
1713                 }
1714             }
1715             else if(i4_mb_y > 0)
1716             {
1717                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1718 
1719                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1720                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1721             }
1722 
1723             if (i4_mb_y == 2)
1724             {
1725                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1726                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1727 
1728                 if (ps_n_mb_ctxt->i4_mb_x == 0)
1729                 {
1730                     u4_pad_wd += PAD_LEFT;
1731                     u4_pad_top = -PAD_LEFT;
1732                 }
1733 
1734                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1735                 {
1736                     u4_pad_wd += PAD_RIGHT;
1737                 }
1738 
1739                 /* padding top luma */
1740                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1741 
1742                 /* padding top chroma */
1743                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1744             }
1745 
1746             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1747 
1748             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1749             {
1750                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1751                 {
1752                     /* Bottom Padding is done in one stretch for the entire width */
1753                     if (ps_proc->u4_disable_deblock_level != 1)
1754                     {
1755                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1756 
1757                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1758 
1759                         ps_n_mb_ctxt->i4_mb_x = 0;
1760                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1761                         ps_deblk->i4_mb_x = 0;
1762                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1763 
1764                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1765                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1766 
1767                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1768 
1769                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1770 
1771                         for (i = 0; i < j; i++)
1772                         {
1773                             for (col = 0; col < i4_n_mbs; col++)
1774                             {
1775                                 ih264e_deblock_mb(ps_proc, ps_deblk);
1776 
1777                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1778 
1779                                 ps_deblk->i4_mb_x++;
1780                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1781                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1782                                 ps_n_mb_ctxt->i4_mb_x++;
1783                             }
1784                         }
1785 
1786                         for (col = 0; col < i4_n_mb_process_count; col++)
1787                         {
1788                             ih264e_deblock_mb(ps_proc, ps_deblk);
1789 
1790                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1791 
1792                             ps_deblk->i4_mb_x++;
1793                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1794                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1795                             ps_n_mb_ctxt->i4_mb_x++;
1796                         }
1797 
1798                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1799 
1800                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1801 
1802                         /* padding left luma */
1803                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1804 
1805                         /* padding left chroma */
1806                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1807 
1808                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1809                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1810 
1811                         /* padding left luma */
1812                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1813 
1814                         /* padding left chroma */
1815                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1816 
1817                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1818 
1819                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1820 
1821                         /* padding right luma */
1822                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1823 
1824                         /* padding right chroma */
1825                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1826 
1827                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1828                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1829 
1830                         /* padding right luma */
1831                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1832 
1833                         /* padding right chroma */
1834                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1835 
1836                     }
1837 
1838                     /* In case height is less than 2 MBs pad top */
1839                     if (ps_proc->i4_ht_mbs <= 2)
1840                     {
1841                         UWORD8 *pu1_pad_top_src;
1842                         /* padding top luma */
1843                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1844                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1845 
1846                         /* padding top chroma */
1847                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1848                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1849                     }
1850 
1851                     /* padding bottom luma */
1852                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1853                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1854 
1855                     /* padding bottom chroma */
1856                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1857                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1858                 }
1859             }
1860         }
1861     }
1862 
1863     return IH264E_SUCCESS;
1864 }
1865 
1866 
1867 /**
1868 *******************************************************************************
1869 *
1870 * @brief This function performs luma & chroma core coding for a set of mb's.
1871 *
1872 * @par Description:
1873 *  The mb to be coded is taken and is evaluated over a predefined set of modes
1874 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1875 *  is selected and using intra/inter prediction filters, prediction is carried out.
1876 *  The deviation between src and pred signal constitutes error signal. This error
1877 *  signal is transformed (hierarchical transform if necessary) and quantized. The
1878 *  quantized residue is packed in to entropy buffer for entropy coding. This is
1879 *  repeated for all the mb's enlisted under the job.
1880 *
1881 * @param[in] ps_proc
1882 *  Process context corresponding to the job
1883 *
1884 * @returns  error status
1885 *
1886 * @remarks none
1887 *
1888 *******************************************************************************
1889 */
ih264e_process(process_ctxt_t * ps_proc)1890 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1891 {
1892     /* error status */
1893     WORD32 error_status = IH264_SUCCESS;
1894 
1895     /* codec context */
1896     codec_t *ps_codec = ps_proc->ps_codec;
1897 
1898     /* cbp luma, chroma */
1899     UWORD32 u4_cbp_l, u4_cbp_c;
1900 
1901     /* width in mbs */
1902     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1903 
1904     /* loop var */
1905     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1906 
1907     /* valid modes */
1908     UWORD32 u4_valid_modes = 0;
1909 
1910     /* gate threshold */
1911     WORD32 i4_gate_threshold = 0;
1912 
1913     /* is intra */
1914     WORD32 luma_idx, chroma_idx, is_intra;
1915 
1916     /* temp variables */
1917     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1918 
1919     /*
1920      * list of modes for evaluation
1921      * -------------------------------------------------------------------------
1922      * Note on enabling I4x4 and I16x16
1923      * At very low QP's the hadamard transform in I16x16 will push up the maximum
1924      * coeff value very high. CAVLC may not be able to represent the value and
1925      * hence the stream may not be decodable in some clips.
1926      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1927      */
1928     if (ps_proc->i4_slice_type == ISLICE)
1929     {
1930         if (ps_proc->u4_frame_qp > 10)
1931         {
1932             /* enable intra 16x16 */
1933             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1934 
1935             /* enable intra 8x8 */
1936             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1937         }
1938 
1939         /* enable intra 4x4 */
1940         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1941         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1942 
1943     }
1944     else if (ps_proc->i4_slice_type == PSLICE)
1945     {
1946         if (ps_proc->u4_frame_qp > 10)
1947         {
1948             /* enable intra 16x16 */
1949             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1950         }
1951 
1952         /* enable intra 4x4 */
1953         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1954         {
1955             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1956         }
1957         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1958 
1959         /* enable inter P16x16 */
1960         u4_valid_modes |= (1 << P16x16);
1961     }
1962     else if (ps_proc->i4_slice_type == BSLICE)
1963     {
1964         if (ps_proc->u4_frame_qp > 10)
1965         {
1966             /* enable intra 16x16 */
1967             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1968         }
1969 
1970         /* enable intra 4x4 */
1971         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1972         {
1973             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1974         }
1975         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1976 
1977         /* enable inter B16x16 */
1978         u4_valid_modes |= (1 << B16x16);
1979     }
1980 
1981 
1982     /* init entropy */
1983     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1984     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1985     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1986 
1987     /* compute recon when :
1988      *   1. current frame is to be used as a reference
1989      *   2. dump recon for bit stream sanity check
1990      */
1991     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
1992                                 ps_codec->s_cfg.u4_enable_recon;
1993 
1994     /* Encode 'n' macroblocks,
1995      * 'n' being the number of mbs dictated by current proc ctxt */
1996     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
1997     {
1998         /* since we have not yet found sad, we have not yet got min sad */
1999         /* we need to initialize these variables for each MB */
2000         /* TODO how to get the min sad into the codec */
2001         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2002         ps_proc->u4_min_sad_reached = 0;
2003 
2004         /* mb analysis */
2005         {
2006             /* temp var */
2007             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2008 
2009             /* force intra refresh ? */
2010             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2011                             (ps_proc->pu1_is_intra_coded[i4_mb_id] != 0) ||
2012                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2013 
2014             /* evaluate inter 16x16 modes */
2015             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2016             {
2017                 /* compute nmb me */
2018                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2019                 {
2020                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2021                                                        i4_wd_mbs - ps_proc->i4_mb_x));
2022                 }
2023 
2024                 /* set pointers to ME data appropriately for other modules to use */
2025                 {
2026                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2027 
2028                     /* get the min sad condition for current mb */
2029                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2030                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2031 
2032                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2033                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2034                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2035 
2036                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2037                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2038                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2039                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2040                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2041 
2042                     /* get the best sub pel buffer */
2043                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2044                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2045                 }
2046                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2047             }
2048             else
2049             {
2050                 /* Derive neighbor availability for the current macroblock */
2051                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2052 
2053                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2054             }
2055 
2056             /*
2057              * If air says intra, we need to force the following code path to evaluate intra
2058              * The easy way is just to say that the inter cost is too much
2059              */
2060             if (!i4_air_enable_inter)
2061             {
2062                 ps_proc->u4_min_sad_reached = 0;
2063                 ps_proc->i4_mb_cost = INT_MAX;
2064                 ps_proc->i4_mb_distortion = INT_MAX;
2065             }
2066             else if (ps_proc->u4_mb_type == PSKIP)
2067             {
2068                 goto UPDATE_MB_INFO;
2069             }
2070 
2071             /* wait until the proc of [top + 1] mb is computed.
2072              * We wait till the proc dependencies are satisfied */
2073              if(ps_proc->i4_mb_y > 0)
2074              {
2075                 /* proc map */
2076                 UWORD8  *pu1_proc_map_top;
2077 
2078                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2079 
2080                 while (1)
2081                 {
2082                     volatile UWORD8 *pu1_buf;
2083                     WORD32 idx = i4_mb_idx + 1;
2084 
2085                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2086                     pu1_buf =  pu1_proc_map_top + idx;
2087                     if(*pu1_buf)
2088                         break;
2089                     ithread_yield();
2090                 }
2091             }
2092 
2093             /* If we already have the minimum sad, there is no point in searching for sad again */
2094             if (ps_proc->u4_min_sad_reached == 0)
2095             {
2096                 /* intra gating in inter slices */
2097                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2098                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2099                 {
2100                     /* distortion of neighboring blocks */
2101                     WORD32 i4_distortion[4];
2102 
2103                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2104 
2105                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2106 
2107                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2108 
2109                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2110 
2111                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2112 
2113                 }
2114 
2115 
2116                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2117                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2118                 {
2119                     /* evaluate intra 4x4 modes */
2120                     if (u4_valid_modes & (1 << I4x4))
2121                     {
2122                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2123                         {
2124                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2125                         }
2126                         else
2127                         {
2128                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2129                         }
2130                     }
2131 
2132                     /* evaluate intra 16x16 modes */
2133                     if (u4_valid_modes & (1 << I16x16))
2134                     {
2135                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2136                     }
2137 
2138                     /* evaluate intra 8x8 modes */
2139                     if (u4_valid_modes & (1 << I8x8))
2140                     {
2141                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2142                     }
2143 
2144                 }
2145         }
2146      }
2147 
2148         /* is intra */
2149         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2150         {
2151             luma_idx = ps_proc->u4_mb_type;
2152             chroma_idx = 0;
2153             is_intra = 1;
2154 
2155             /* evaluate chroma blocks for intra */
2156             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2157         }
2158         else
2159         {
2160             luma_idx = 3;
2161             chroma_idx = 1;
2162             is_intra = 0;
2163         }
2164         ps_proc->u4_is_intra = is_intra;
2165         ps_proc->ps_pu->b1_intra_flag = is_intra;
2166 
2167         /* redo MV pred of neighbors in the case intra mb */
2168         /* TODO : currently called unconditionally, needs to be called only in the case of intra
2169          * to modify neighbors */
2170         if (ps_proc->i4_slice_type != ISLICE)
2171         {
2172             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2173         }
2174 
2175         /* Perform luma mb core coding */
2176         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2177 
2178         /* Perform luma mb core coding */
2179         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2180 
2181         /* coded block pattern */
2182         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2183 
2184         if (!ps_proc->u4_is_intra)
2185         {
2186             if (ps_proc->i4_slice_type == BSLICE)
2187             {
2188                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2189                 {
2190                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2191                 }
2192             }
2193             else if(!ps_proc->u4_cbp)
2194             {
2195                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2196                 {
2197                     ps_proc->u4_mb_type = PSKIP;
2198                 }
2199             }
2200         }
2201 
2202 UPDATE_MB_INFO:
2203 
2204         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2205         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2206 
2207         /**********************************************************************/
2208         /* if disable deblock level is '0' this implies enable deblocking for */
2209         /* all edges of all macroblocks with out any restrictions             */
2210         /*                                                                    */
2211         /* if disable deblock level is '1' this implies disable deblocking for*/
2212         /* all edges of all macroblocks with out any restrictions             */
2213         /*                                                                    */
2214         /* if disable deblock level is '2' this implies enable deblocking for */
2215         /* all edges of all macroblocks except edges overlapping with slice   */
2216         /* boundaries. This option is not currently supported by the encoder  */
2217         /* hence the slice map should be of no significance to perform debloc */
2218         /* king                                                               */
2219         /**********************************************************************/
2220 
2221         if (ps_proc->u4_compute_recon)
2222         {
2223             /* deblk context */
2224             /* src pointers */
2225             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2226             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2227 
2228             /* src indices */
2229             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2230             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2231 
2232             /* compute blocking strength */
2233             if (ps_proc->u4_disable_deblock_level != 1)
2234             {
2235                 ih264e_compute_bs(ps_proc);
2236             }
2237 
2238             /* nmb deblocking and hpel and padding */
2239             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2240                                                   pu1_cur_pic_chroma, i4_mb_x,
2241                                                   i4_mb_y);
2242         }
2243 
2244         /* update the context after for coding next mb */
2245         error_status |= ih264e_update_proc_ctxt(ps_proc);
2246 
2247         /* Once the last row is processed, mark the buffer status appropriately */
2248         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2249         {
2250             /* Pointer to current picture buffer structure */
2251             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2252 
2253             /* Pointer to current picture's mv buffer structure */
2254             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2255 
2256             /**********************************************************************/
2257             /* if disable deblock level is '0' this implies enable deblocking for */
2258             /* all edges of all macroblocks with out any restrictions             */
2259             /*                                                                    */
2260             /* if disable deblock level is '1' this implies disable deblocking for*/
2261             /* all edges of all macroblocks with out any restrictions             */
2262             /*                                                                    */
2263             /* if disable deblock level is '2' this implies enable deblocking for */
2264             /* all edges of all macroblocks except edges overlapping with slice   */
2265             /* boundaries. This option is not currently supported by the encoder  */
2266             /* hence the slice map should be of no significance to perform debloc */
2267             /* king                                                               */
2268             /**********************************************************************/
2269             error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2270 
2271             error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2272 
2273             if (ps_codec->s_cfg.u4_enable_recon)
2274             {
2275                 /* pic cnt */
2276                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2277 
2278                 /* rec buffers */
2279                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2280 
2281                 /* is last? */
2282                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2283 
2284                 /* frame time stamp */
2285                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2286                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2287             }
2288 
2289         }
2290     }
2291 
2292     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2293 
2294     return error_status;
2295 }
2296 
2297 /**
2298 *******************************************************************************
2299 *
2300 * @brief
2301 *  Function to update rc context after encoding
2302 *
2303 * @par   Description
2304 *  This function updates the rate control context after the frame is encoded.
2305 *  Number of bits consumed by the current frame, frame distortion, frame cost,
2306 *  number of intra/inter mb's, ... are passed on to rate control context for
2307 *  updating the rc model.
2308 *
2309 * @param[in] ps_codec
2310 *  Handle to codec context
2311 *
2312 * @param[in] ctxt_sel
2313 *  frame context selector
2314 *
2315 * @param[in] pic_cnt
2316 *  pic count
2317 *
2318 * @returns i4_stuffing_byte
2319 *  number of stuffing bytes (if necessary)
2320 *
2321 * @remarks
2322 *
2323 *******************************************************************************
2324 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2325 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2326 {
2327     /* proc set base idx */
2328     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2329 
2330     /* proc ctxt */
2331     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2332 
2333     /* frame qp */
2334     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2335 
2336     /* cbr rc return status */
2337     WORD32 i4_stuffing_byte = 0;
2338 
2339     /* current frame stats */
2340     frame_info_t s_frame_info;
2341     picture_type_e rc_pic_type;
2342 
2343     /* temp var */
2344     WORD32 i, j;
2345 
2346     /********************************************************************/
2347     /*                            BEGIN INIT                            */
2348     /********************************************************************/
2349 
2350     /* init frame info */
2351     irc_init_frame_info(&s_frame_info);
2352 
2353     /* get frame info */
2354     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2355     {
2356         /*****************************************************************/
2357         /* One frame can be encoded by max of u4_num_cores threads       */
2358         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2359         /* u4_num_cores threads                                          */
2360         /*****************************************************************/
2361         for (j = 0; j< MAX_MB_TYPE; j++)
2362         {
2363             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2364 
2365             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2366 
2367             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2368         }
2369 
2370         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2371 
2372         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2373 
2374         /*****************************************************************/
2375         /* gather number of residue and header bits consumed by the frame*/
2376         /*****************************************************************/
2377         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2378     }
2379 
2380     /* get pic type */
2381     switch (ps_codec->pic_type)
2382     {
2383         case PIC_I:
2384         case PIC_IDR:
2385             rc_pic_type = I_PIC;
2386             break;
2387         case PIC_P:
2388             rc_pic_type = P_PIC;
2389             break;
2390         case PIC_B:
2391             rc_pic_type = B_PIC;
2392             break;
2393         default:
2394             assert(0);
2395             break;
2396     }
2397 
2398     /* update rc lib with current frame stats */
2399     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2400                                           &(s_frame_info),
2401                                           ps_codec->s_rate_control.pps_pd_frm_rate,
2402                                           ps_codec->s_rate_control.pps_time_stamp,
2403                                           ps_codec->s_rate_control.pps_frame_time,
2404                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2405                                           &rc_pic_type,
2406                                           i4_is_first_frm,
2407                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2408                                           u1_frame_qp,
2409                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
2410                                           &ps_codec->s_rate_control.i4_avg_activity);
2411     return i4_stuffing_byte;
2412 }
2413 
2414 /**
2415 *******************************************************************************
2416 *
2417 * @brief
2418 *  entry point of a spawned encoder thread
2419 *
2420 * @par Description:
2421 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
2422 *  calls necessary routines.
2423 *
2424 * @param[in] pv_proc
2425 *  Process context corresponding to the thread
2426 *
2427 * @returns  error status
2428 *
2429 * @remarks
2430 *
2431 *******************************************************************************
2432 */
ih264e_process_thread(void * pv_proc)2433 WORD32 ih264e_process_thread(void *pv_proc)
2434 {
2435     /* error status */
2436     IH264_ERROR_T ret = IH264_SUCCESS;
2437     WORD32 error_status = IH264_SUCCESS;
2438 
2439     /* proc ctxt */
2440     process_ctxt_t *ps_proc = pv_proc;
2441 
2442     /* codec ctxt */
2443     codec_t *ps_codec = ps_proc->ps_codec;
2444 
2445     /* structure to represent a processing job entry */
2446     job_t s_job;
2447 
2448     /* blocking call : entropy dequeue is non-blocking till all
2449      * the proc jobs are processed */
2450     WORD32 is_blocking = 0;
2451 
2452     /* set affinity */
2453     ithread_set_affinity(ps_proc->i4_id);
2454 
2455     while(1)
2456     {
2457         /* dequeue a job from the entropy queue */
2458         {
2459             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2460 
2461             /* codec context selector */
2462             WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2463 
2464             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2465 
2466             /* have the lock */
2467             if (error == 0)
2468             {
2469                 if (*pu4_buf == 0)
2470                 {
2471                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
2472                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2473                     if (IH264_SUCCESS == ret)
2474                     {
2475                         *pu4_buf = 1;
2476                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2477                         goto WORKER;
2478                     }
2479                     else if(is_blocking)
2480                     {
2481                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2482                         break;
2483                     }
2484                 }
2485                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2486             }
2487         }
2488 
2489         /* dequeue a job from the process queue */
2490         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2491         if (IH264_SUCCESS != ret)
2492         {
2493             if(ps_proc->i4_id)
2494                 break;
2495             else
2496             {
2497                 is_blocking = 1;
2498                 continue;
2499             }
2500         }
2501 
2502 WORKER:
2503         /* choose appropriate proc context based on proc_base_idx */
2504         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2505 
2506         switch (s_job.i4_cmd)
2507         {
2508             case CMD_PROCESS:
2509                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2510                 ps_proc->i4_mb_x = s_job.i2_mb_x;
2511                 ps_proc->i4_mb_y = s_job.i2_mb_y;
2512 
2513                 /* init process context */
2514                 ih264e_init_proc_ctxt(ps_proc);
2515 
2516                 /* core code all mbs enlisted under the current job */
2517                 error_status |= ih264e_process(ps_proc);
2518                 break;
2519 
2520             case CMD_ENTROPY:
2521                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2522                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2523                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2524 
2525                 /* init entropy */
2526                 ih264e_init_entropy_ctxt(ps_proc);
2527 
2528                 /* entropy code all mbs enlisted under the current job */
2529                 error_status |= ih264e_entropy(ps_proc);
2530                 break;
2531 
2532             default:
2533                 error_status |= IH264_FAIL;
2534                 break;
2535         }
2536     }
2537 
2538     /* send error code */
2539     ps_proc->i4_error_code = error_status;
2540     return ret;
2541 }
2542