1 /******************************************************************************
2  *
3  * Copyright (C) 2015 The Android Open Source Project
4  *
5  * Licensed under the Apache License, Version 2.0 (the "License");
6  * you may not use this file except in compliance with the License.
7  * You may obtain a copy of the License at:
8  *
9  * http://www.apache.org/licenses/LICENSE-2.0
10  *
11  * Unless required by applicable law or agreed to in writing, software
12  * distributed under the License is distributed on an "AS IS" BASIS,
13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  *
17  *****************************************************************************
18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
19 */
20 
21 /**
22 *******************************************************************************
23 * @file
24 *  ih264e_process.c
25 *
26 * @brief
27 *  Contains functions for codec thread
28 *
29 * @author
30 *  Harish
31 *
32 * @par List of Functions:
33 * - ih264e_generate_sps_pps()
34 * - ih264e_init_entropy_ctxt()
35 * - ih264e_entropy()
36 * - ih264e_pack_header_data()
37 * - ih264e_update_proc_ctxt()
38 * - ih264e_init_proc_ctxt()
39 * - ih264e_pad_recon_buffer()
40 * - ih264e_dblk_pad_hpel_processing_n_mbs()
41 * - ih264e_process()
42 * - ih264e_set_rc_pic_params()
43 * - ih264e_update_rc_post_enc()
44 * - ih264e_process_thread()
45 *
46 * @remarks
47 *  None
48 *
49 *******************************************************************************
50 */
51 
52 /*****************************************************************************/
53 /* File Includes                                                             */
54 /*****************************************************************************/
55 
56 /* System include files */
57 #include <stdio.h>
58 #include <stddef.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #include <limits.h>
62 #include <assert.h>
63 
64 /* User include files */
65 #include "ih264_typedefs.h"
66 #include "iv2.h"
67 #include "ive2.h"
68 #include "ih264_defs.h"
69 #include "ih264_debug.h"
70 #include "ime_distortion_metrics.h"
71 #include "ime_defs.h"
72 #include "ime_structs.h"
73 #include "ih264_error.h"
74 #include "ih264_structs.h"
75 #include "ih264_trans_quant_itrans_iquant.h"
76 #include "ih264_inter_pred_filters.h"
77 #include "ih264_mem_fns.h"
78 #include "ih264_padding.h"
79 #include "ih264_intra_pred_filters.h"
80 #include "ih264_deblk_edge_filters.h"
81 #include "ih264_cabac_tables.h"
82 #include "ih264_platform_macros.h"
83 #include "ih264_macros.h"
84 #include "ih264_buf_mgr.h"
85 #include "ih264e_error.h"
86 #include "ih264e_bitstream.h"
87 #include "ih264_common_tables.h"
88 #include "ih264_list.h"
89 #include "ih264e_defs.h"
90 #include "irc_cntrl_param.h"
91 #include "irc_frame_info_collector.h"
92 #include "ih264e_rate_control.h"
93 #include "ih264e_cabac_structs.h"
94 #include "ih264e_structs.h"
95 #include "ih264e_cabac.h"
96 #include "ih264e_process.h"
97 #include "ithread.h"
98 #include "ih264e_intra_modes_eval.h"
99 #include "ih264e_encode_header.h"
100 #include "ih264e_globals.h"
101 #include "ih264e_config.h"
102 #include "ih264e_trace.h"
103 #include "ih264e_statistics.h"
104 #include "ih264_cavlc_tables.h"
105 #include "ih264e_cavlc.h"
106 #include "ih264e_deblk.h"
107 #include "ih264e_me.h"
108 #include "ih264e_debug.h"
109 #include "ih264e_master.h"
110 #include "ih264e_utils.h"
111 #include "irc_mem_req_and_acq.h"
112 #include "irc_rate_control_api.h"
113 #include "ih264e_platform_macros.h"
114 #include "ime_statistics.h"
115 
116 
117 /*****************************************************************************/
118 /* Function Definitions                                                      */
119 /*****************************************************************************/
120 
121 /**
122 ******************************************************************************
123 *
124 *  @brief This function generates sps, pps set on request
125 *
126 *  @par   Description
127 *  When the encoder is set in header generation mode, the following function
128 *  is called. This generates sps and pps headers and returns the control back
129 *  to caller.
130 *
131 *  @param[in]    ps_codec
132 *  pointer to codec context
133 *
134 *  @return      success or failure error code
135 *
136 ******************************************************************************
137 */
ih264e_generate_sps_pps(codec_t * ps_codec)138 IH264E_ERROR_T ih264e_generate_sps_pps(codec_t *ps_codec)
139 {
140     /* choose between ping-pong process buffer set */
141     WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
142 
143     /* entropy ctxt */
144     entropy_ctxt_t *ps_entropy = &ps_codec->as_process[ctxt_sel * MAX_PROCESS_THREADS].s_entropy;
145 
146     /* Bitstream structure */
147     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
148 
149     /* sps */
150     sps_t *ps_sps = NULL;
151 
152     /* pps */
153     pps_t *ps_pps = NULL;
154 
155     /* output buff */
156     out_buf_t *ps_out_buf = &ps_codec->as_out_buf[ctxt_sel];
157 
158 
159     /********************************************************************/
160     /*      initialize the bit stream buffer                            */
161     /********************************************************************/
162     ih264e_bitstrm_init(ps_bitstrm, ps_out_buf->s_bits_buf.pv_buf, ps_out_buf->s_bits_buf.u4_bufsize);
163 
164     /********************************************************************/
165     /*                    BEGIN HEADER GENERATION                       */
166     /********************************************************************/
167     /*ps_codec->i4_pps_id ++;*/
168     ps_codec->i4_pps_id %= MAX_PPS_CNT;
169 
170     /*ps_codec->i4_sps_id ++;*/
171     ps_codec->i4_sps_id %= MAX_SPS_CNT;
172 
173     /* populate sps header */
174     ps_sps = ps_codec->ps_sps_base + ps_codec->i4_sps_id;
175     ih264e_populate_sps(ps_codec, ps_sps);
176 
177     /* populate pps header */
178     ps_pps = ps_codec->ps_pps_base + ps_codec->i4_pps_id;
179     ih264e_populate_pps(ps_codec, ps_pps);
180 
181     ps_entropy->i4_error_code = IH264E_SUCCESS;
182 
183     /* generate sps */
184     ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
185                                                      &ps_codec->s_cfg.s_vui);
186 
187     /* generate pps */
188     ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
189 
190     /* queue output buffer */
191     ps_out_buf->s_bits_buf.u4_bytes = ps_bitstrm->u4_strm_buf_offset;
192 
193     return ps_entropy->i4_error_code;
194 }
195 
196 /**
197 *******************************************************************************
198 *
199 * @brief   initialize entropy context.
200 *
201 * @par Description:
202 *  Before invoking the call to perform to entropy coding the entropy context
203 *  associated with the job needs to be initialized. This involves the start
204 *  mb address, end mb address, slice index and the pointer to location at
205 *  which the mb residue info and mb header info are packed.
206 *
207 * @param[in] ps_proc
208 *  Pointer to the current process context
209 *
210 * @returns error status
211 *
212 * @remarks none
213 *
214 *******************************************************************************
215 */
ih264e_init_entropy_ctxt(process_ctxt_t * ps_proc)216 IH264E_ERROR_T ih264e_init_entropy_ctxt(process_ctxt_t *ps_proc)
217 {
218     /* codec context */
219     codec_t *ps_codec = ps_proc->ps_codec;
220 
221     /* entropy ctxt */
222     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
223 
224     /* start address */
225     ps_entropy->i4_mb_start_add = ps_entropy->i4_mb_y * ps_entropy->i4_wd_mbs + ps_entropy->i4_mb_x;
226 
227     /* end address */
228     ps_entropy->i4_mb_end_add = ps_entropy->i4_mb_start_add + ps_entropy->i4_mb_cnt;
229 
230     /* slice index */
231     ps_entropy->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_entropy->i4_mb_start_add];
232 
233     /* sof */
234     /* @ start of frame or start of a new slice, set sof flag */
235     if (ps_entropy->i4_mb_start_add == 0)
236     {
237         ps_entropy->i4_sof = 1;
238     }
239 
240     if (ps_entropy->i4_mb_x == 0)
241     {
242         /* packed mb coeff data */
243         ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
244                         ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
245 
246         /* packed mb header data */
247         ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
248                         ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
249     }
250 
251     return IH264E_SUCCESS;
252 }
253 
254 /**
255 *******************************************************************************
256 *
257 * @brief entry point for entropy coding
258 *
259 * @par Description
260 *  This function calls lower level functions to perform entropy coding for a
261 *  group (n rows) of mb's. After encoding 1 row of mb's,  the function takes
262 *  back the control, updates the ctxt and calls lower level functions again.
263 *  This process is repeated till all the rows or group of mb's (which ever is
264 *  minimum) are coded
265 *
266 * @param[in] ps_proc
267 *  process context
268 *
269 * @returns  error status
270 *
271 * @remarks
272 *
273 *******************************************************************************
274 */
275 
ih264e_entropy(process_ctxt_t * ps_proc)276 IH264E_ERROR_T ih264e_entropy(process_ctxt_t *ps_proc)
277 {
278     /* codec context */
279     codec_t *ps_codec = ps_proc->ps_codec;
280 
281     /* entropy context */
282     entropy_ctxt_t *ps_entropy = &ps_proc->s_entropy;
283 
284     /* cabac context */
285     cabac_ctxt_t *ps_cabac_ctxt = ps_entropy->ps_cabac;
286 
287     /* sps */
288     sps_t *ps_sps = ps_entropy->ps_sps_base + (ps_entropy->u4_sps_id % MAX_SPS_CNT);
289 
290     /* pps */
291     pps_t *ps_pps = ps_entropy->ps_pps_base + (ps_entropy->u4_pps_id % MAX_PPS_CNT);
292 
293     /* slice header */
294     slice_header_t *ps_slice_hdr = ps_entropy->ps_slice_hdr_base + (ps_entropy->i4_cur_slice_idx % MAX_SLICE_HDR_CNT);
295 
296     /* slice type */
297     WORD32 i4_slice_type = ps_proc->i4_slice_type;
298 
299     /* Bitstream structure */
300     bitstrm_t *ps_bitstrm = ps_entropy->ps_bitstrm;
301 
302     /* output buff */
303     out_buf_t s_out_buf;
304 
305     /* proc map */
306     UWORD8  *pu1_proc_map;
307 
308     /* entropy map */
309     UWORD8  *pu1_entropy_map_curr;
310 
311     /* proc base idx */
312     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
313 
314     /* temp var */
315     WORD32 i4_wd_mbs, i4_ht_mbs;
316     UWORD32 u4_mb_cnt, u4_mb_idx, u4_mb_end_idx;
317     WORD32 bitstream_start_offset, bitstream_end_offset;
318     /********************************************************************/
319     /*                            BEGIN INIT                            */
320     /********************************************************************/
321 
322     /* entropy encode start address */
323     u4_mb_idx = ps_entropy->i4_mb_start_add;
324 
325     /* entropy encode end address */
326     u4_mb_end_idx = ps_entropy->i4_mb_end_add;
327 
328     /* width in mbs */
329     i4_wd_mbs = ps_entropy->i4_wd_mbs;
330 
331     /* height in mbs */
332     i4_ht_mbs = ps_entropy->i4_ht_mbs;
333 
334     /* total mb cnt */
335     u4_mb_cnt = i4_wd_mbs * i4_ht_mbs;
336 
337     /* proc map */
338     pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
339 
340     /* entropy map */
341     pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
342 
343     /********************************************************************/
344     /* @ start of frame / slice,                                        */
345     /*      initialize the output buffer,                               */
346     /*      initialize the bit stream buffer,                           */
347     /*      check if sps and pps headers have to be generated,          */
348     /*      populate and generate slice header                          */
349     /********************************************************************/
350     if (ps_entropy->i4_sof)
351     {
352         /********************************************************************/
353         /*      initialize the output buffer                                */
354         /********************************************************************/
355         s_out_buf = ps_codec->as_out_buf[ctxt_sel];
356 
357         /* is last frame to encode */
358         s_out_buf.u4_is_last = ps_entropy->u4_is_last;
359 
360         /* frame idx */
361         s_out_buf.u4_timestamp_high = ps_entropy->u4_timestamp_high;
362         s_out_buf.u4_timestamp_low = ps_entropy->u4_timestamp_low;
363 
364         /********************************************************************/
365         /*      initialize the bit stream buffer                            */
366         /********************************************************************/
367         ih264e_bitstrm_init(ps_bitstrm, s_out_buf.s_bits_buf.pv_buf, s_out_buf.s_bits_buf.u4_bufsize);
368 
369         /********************************************************************/
370         /*                    BEGIN HEADER GENERATION                       */
371         /********************************************************************/
372         if (1 == ps_entropy->i4_gen_header)
373         {
374             /* generate sps */
375             ps_entropy->i4_error_code |= ih264e_generate_sps(ps_bitstrm, ps_sps,
376                                                              &ps_codec->s_cfg.s_vui);
377             /* generate pps */
378             ps_entropy->i4_error_code |= ih264e_generate_pps(ps_bitstrm, ps_pps, ps_sps);
379 
380             /* reset i4_gen_header */
381             ps_entropy->i4_gen_header = 0;
382         }
383 
384         /* populate slice header */
385         ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps, ps_sps);
386 
387         /* generate slice header */
388         ps_entropy->i4_error_code |= ih264e_generate_slice_header(ps_bitstrm, ps_slice_hdr,
389                                                                   ps_pps, ps_sps);
390 
391         /* once start of frame / slice is done, you can reset it */
392         /* it is the responsibility of the caller to set this flag */
393         ps_entropy->i4_sof = 0;
394 
395         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
396         {
397             BITSTREAM_BYTE_ALIGN(ps_bitstrm);
398             BITSTREAM_FLUSH(ps_bitstrm);
399             ih264e_init_cabac_ctxt(ps_entropy);
400         }
401     }
402 
403     /* begin entropy coding for the mb set */
404     while (u4_mb_idx < u4_mb_end_idx)
405     {
406         /* init ptrs/indices */
407         if (ps_entropy->i4_mb_x == i4_wd_mbs)
408         {
409             ps_entropy->i4_mb_y++;
410             ps_entropy->i4_mb_x = 0;
411 
412             /* packed mb coeff data */
413             ps_entropy->pv_mb_coeff_data = ((UWORD8 *)ps_entropy->pv_pic_mb_coeff_data) +
414                             ps_entropy->i4_mb_y * ps_codec->u4_size_coeff_data;
415 
416             /* packed mb header data */
417             ps_entropy->pv_mb_header_data = ((UWORD8 *)ps_entropy->pv_pic_mb_header_data) +
418                             ps_entropy->i4_mb_y * ps_codec->u4_size_header_data;
419 
420             /* proc map */
421             pu1_proc_map = ps_proc->pu1_proc_map + ps_entropy->i4_mb_y * i4_wd_mbs;
422 
423             /* entropy map */
424             pu1_entropy_map_curr = ps_entropy->pu1_entropy_map + ps_entropy->i4_mb_y * i4_wd_mbs;
425         }
426 
427         DEBUG("\nmb indices x, y %d, %d", ps_entropy->i4_mb_x, ps_entropy->i4_mb_y);
428         ENTROPY_TRACE("mb index x %d", ps_entropy->i4_mb_x);
429         ENTROPY_TRACE("mb index y %d", ps_entropy->i4_mb_y);
430 
431         /* wait until the curr mb is core coded */
432         /* The wait for curr mb to be core coded is essential when entropy is launched
433          * as a separate job
434          */
435         while (1)
436         {
437             volatile UWORD8 *pu1_buf1;
438             WORD32 idx = ps_entropy->i4_mb_x;
439 
440             pu1_buf1 = pu1_proc_map + idx;
441             if (*pu1_buf1)
442                 break;
443             ithread_yield();
444         }
445 
446 
447         /* write mb layer */
448         ps_entropy->i4_error_code |= ps_codec->pf_write_mb_syntax_layer[ps_entropy->u1_entropy_coding_mode_flag][i4_slice_type](ps_entropy);
449         /* Starting bitstream offset for header in bits */
450         bitstream_start_offset = GET_NUM_BITS(ps_bitstrm);
451 
452         /* set entropy map */
453         pu1_entropy_map_curr[ps_entropy->i4_mb_x] = 1;
454 
455         u4_mb_idx++;
456         ps_entropy->i4_mb_x++;
457         /* check for eof */
458         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
459         {
460             if (ps_entropy->i4_mb_x < i4_wd_mbs)
461             {
462                 ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
463             }
464         }
465 
466         if (ps_entropy->i4_mb_x == i4_wd_mbs)
467         {
468             /* if slices are enabled */
469             if (ps_codec->s_cfg.e_slice_mode == IVE_SLICE_MODE_BLOCKS)
470             {
471                 /* current slice index */
472                 WORD32 i4_curr_slice_idx = ps_entropy->i4_cur_slice_idx;
473 
474                 /* slice map */
475                 UWORD8 *pu1_slice_idx = ps_entropy->pu1_slice_idx;
476 
477                 /* No need to open a slice at end of frame. The current slice can be closed at the time
478                  * of signaling eof flag.
479                  */
480                 if ((u4_mb_idx != u4_mb_cnt) && (i4_curr_slice_idx
481                                                 != pu1_slice_idx[u4_mb_idx]))
482                 {
483                     if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
484                     { /* mb skip run */
485                         if ((i4_slice_type != ISLICE)
486                                         && *ps_entropy->pi4_mb_skip_run)
487                         {
488                             if (*ps_entropy->pi4_mb_skip_run)
489                             {
490                             PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run, ps_entropy->i4_error_code, "mb skip run");
491                                 *ps_entropy->pi4_mb_skip_run = 0;
492                             }
493                         }
494                         /* put rbsp trailing bits for the previous slice */
495                                  ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
496                     }
497                     else
498                     {
499                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
500                     }
501 
502                     /* update slice header pointer */
503                     i4_curr_slice_idx = pu1_slice_idx[u4_mb_idx];
504                     ps_entropy->i4_cur_slice_idx = i4_curr_slice_idx;
505                     ps_slice_hdr = ps_entropy->ps_slice_hdr_base+ (i4_curr_slice_idx % MAX_SLICE_HDR_CNT);
506 
507                     /* populate slice header */
508                     ps_entropy->i4_mb_start_add = u4_mb_idx;
509                     ih264e_populate_slice_header(ps_proc, ps_slice_hdr, ps_pps,
510                                                  ps_sps);
511 
512                     /* generate slice header */
513                     ps_entropy->i4_error_code |= ih264e_generate_slice_header(
514                                     ps_bitstrm, ps_slice_hdr, ps_pps, ps_sps);
515                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag)
516                     {
517                         BITSTREAM_BYTE_ALIGN(ps_bitstrm);
518                         BITSTREAM_FLUSH(ps_bitstrm);
519                         ih264e_init_cabac_ctxt(ps_entropy);
520                     }
521                 }
522                 else
523                 {
524                     if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
525                                     && u4_mb_idx != u4_mb_cnt)
526                     {
527                         ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
528                     }
529                 }
530             }
531             /* Dont execute any further instructions until store synchronization took place */
532             DATA_SYNC();
533         }
534 
535         /* Ending bitstream offset for header in bits */
536         bitstream_end_offset = GET_NUM_BITS(ps_bitstrm);
537         ps_entropy->u4_header_bits[i4_slice_type == PSLICE] +=
538                         bitstream_end_offset - bitstream_start_offset;
539     }
540 
541     /* check for eof */
542     if (u4_mb_idx == u4_mb_cnt)
543     {
544         /* set end of frame flag */
545         ps_entropy->i4_eof = 1;
546     }
547     else
548     {
549         if (CABAC == ps_entropy->u1_entropy_coding_mode_flag
550                         && ps_codec->s_cfg.e_slice_mode
551                                         != IVE_SLICE_MODE_BLOCKS)
552         {
553             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 0);
554         }
555     }
556 
557     if (ps_entropy->i4_eof)
558     {
559         if (CAVLC == ps_entropy->u1_entropy_coding_mode_flag)
560         {
561             /* mb skip run */
562             if ((i4_slice_type != ISLICE) && *ps_entropy->pi4_mb_skip_run)
563             {
564                 if (*ps_entropy->pi4_mb_skip_run)
565                 {
566                     PUT_BITS_UEV(ps_bitstrm, *ps_entropy->pi4_mb_skip_run,
567                                  ps_entropy->i4_error_code, "mb skip run");
568                     *ps_entropy->pi4_mb_skip_run = 0;
569                 }
570             }
571             /* put rbsp trailing bits */
572              ps_entropy->i4_error_code |= ih264e_put_rbsp_trailing_bits(ps_bitstrm);
573         }
574         else
575         {
576             ih264e_cabac_encode_terminate(ps_cabac_ctxt, 1);
577         }
578 
579         /* update current frame stats to rc library */
580         {
581             /* number of bytes to stuff */
582             WORD32 i4_stuff_bytes;
583 
584             /* update */
585             i4_stuff_bytes = ih264e_update_rc_post_enc(
586                             ps_codec, ctxt_sel,
587                             (ps_proc->ps_codec->i4_poc == 0));
588 
589             /* cbr rc - house keeping */
590             if (ps_codec->s_rate_control.post_encode_skip[ctxt_sel])
591             {
592                 ps_entropy->ps_bitstrm->u4_strm_buf_offset = 0;
593             }
594             else if (i4_stuff_bytes)
595             {
596                 /* add filler nal units */
597                 ps_entropy->i4_error_code |= ih264e_add_filler_nal_unit(ps_bitstrm, i4_stuff_bytes);
598             }
599         }
600 
601         /*
602          *Frame number is to be incremented only if the current frame is a
603          * reference frame. After each successful frame encode, we increment
604          * frame number by 1
605          */
606         if (!ps_codec->s_rate_control.post_encode_skip[ctxt_sel]
607                         && ps_codec->u4_is_curr_frm_ref)
608         {
609             ps_codec->i4_frame_num++;
610         }
611         /********************************************************************/
612         /*      signal the output                                           */
613         /********************************************************************/
614         ps_codec->as_out_buf[ctxt_sel].s_bits_buf.u4_bytes =
615                         ps_entropy->ps_bitstrm->u4_strm_buf_offset;
616 
617         DEBUG("entropy status %x", ps_entropy->i4_error_code);
618     }
619 
620     /* allow threads to dequeue entropy jobs */
621     ps_codec->au4_entropy_thread_active[ctxt_sel] = 0;
622 
623     return ps_entropy->i4_error_code;
624 }
625 
626 /**
627 *******************************************************************************
628 *
629 * @brief Packs header information of a mb in to a buffer
630 *
631 * @par Description:
632 *  After the deciding the mode info of a macroblock, the syntax elements
633 *  associated with the mb are packed and stored. The entropy thread unpacks
634 *  this buffer and generates the end bit stream.
635 *
636 * @param[in] ps_proc
637 *  Pointer to the current process context
638 *
639 * @returns error status
640 *
641 * @remarks none
642 *
643 *******************************************************************************
644 */
ih264e_pack_header_data(process_ctxt_t * ps_proc)645 IH264E_ERROR_T ih264e_pack_header_data(process_ctxt_t *ps_proc)
646 {
647     /* curr mb type */
648     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
649 
650     /* pack mb syntax layer of curr mb (used for entropy coding) */
651     if (u4_mb_type == I4x4)
652     {
653         /* pointer to mb header storage space */
654         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
655         mb_hdr_i4x4_t *ps_mb_hdr = (mb_hdr_i4x4_t *)ps_proc->pv_mb_header_data;
656 
657         /* temp var */
658         WORD32 i4, byte;
659 
660         /* mb type plus mode */
661         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + u4_mb_type;
662 
663         /* cbp */
664         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
665 
666         /* mb qp delta */
667         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
668 
669         /* sub mb modes */
670         for (i4 = 0; i4 < 16; i4 ++)
671         {
672             byte = 0;
673 
674             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
675                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
676             {
677                 byte |= 1;
678             }
679             else
680             {
681 
682                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
683                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
684                 {
685                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 1);
686                 }
687                 else
688                 {
689                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 1;
690                 }
691             }
692 
693             i4++;
694 
695             if (ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4] ==
696                             ps_proc->au1_intra_luma_mb_4x4_modes[i4])
697             {
698                 byte |= 16;
699             }
700             else
701             {
702 
703                 if (ps_proc->au1_intra_luma_mb_4x4_modes[i4] <
704                                 ps_proc->au1_predicted_intra_luma_mb_4x4_modes[i4])
705                 {
706                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] << 5);
707                 }
708                 else
709                 {
710                     byte |= (ps_proc->au1_intra_luma_mb_4x4_modes[i4] - 1) << 5;
711                 }
712             }
713 
714             ps_mb_hdr->au1_sub_blk_modes[i4 >> 1] =  byte;
715         }
716 
717         /* end of mb layer */
718         pu1_ptr += sizeof(mb_hdr_i4x4_t);
719         ps_proc->pv_mb_header_data = pu1_ptr;
720     }
721     else if (u4_mb_type == I16x16)
722     {
723         /* pointer to mb header storage space */
724         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
725         mb_hdr_i16x16_t *ps_mb_hdr = (mb_hdr_i16x16_t *)ps_proc->pv_mb_header_data;
726 
727         /* mb type plus mode */
728         ps_mb_hdr->common.u1_mb_type_mode = (ps_proc->u1_c_i8_mode << 6) + (ps_proc->u1_l_i16_mode << 4) + u4_mb_type;
729 
730         /* cbp */
731         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
732 
733         /* mb qp delta */
734         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
735 
736         /* end of mb layer */
737         pu1_ptr += sizeof(mb_hdr_i16x16_t);
738         ps_proc->pv_mb_header_data = pu1_ptr;
739     }
740     else if (u4_mb_type == P16x16)
741     {
742         /* pointer to mb header storage space */
743         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
744         mb_hdr_p16x16_t *ps_mb_hdr = (mb_hdr_p16x16_t *)ps_proc->pv_mb_header_data;
745 
746         /* mb type */
747         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
748 
749         /* cbp */
750         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
751 
752         /* mb qp delta */
753         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
754 
755         ps_mb_hdr->ai2_mv[0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
756 
757         ps_mb_hdr->ai2_mv[1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
758 
759         /* end of mb layer */
760         pu1_ptr += sizeof(mb_hdr_p16x16_t);
761         ps_proc->pv_mb_header_data = pu1_ptr;
762     }
763     else if (u4_mb_type == PSKIP)
764     {
765         /* pointer to mb header storage space */
766         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
767         mb_hdr_pskip_t *ps_mb_hdr = (mb_hdr_pskip_t *)ps_proc->pv_mb_header_data;
768 
769         /* mb type */
770         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
771 
772         /* end of mb layer */
773         pu1_ptr += sizeof(mb_hdr_pskip_t);
774         ps_proc->pv_mb_header_data = pu1_ptr;
775     }
776     else if(u4_mb_type == B16x16)
777     {
778 
779         /* pointer to mb header storage space */
780         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
781         mb_hdr_b16x16_t *ps_mb_hdr = (mb_hdr_b16x16_t *)ps_proc->pv_mb_header_data;
782 
783         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
784 
785         /* mb type plus mode */
786         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
787 
788         /* cbp */
789         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
790 
791         /* mb qp delta */
792         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
793 
794         /* l0 & l1 me data */
795         if (u4_pred_mode != PRED_L1)
796         {
797             ps_mb_hdr->ai2_mv[0][0] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvx
798                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvx;
799 
800             ps_mb_hdr->ai2_mv[0][1] = ps_proc->ps_pu->s_me_info[0].s_mv.i2_mvy
801                             - ps_proc->ps_pred_mv[0].s_mv.i2_mvy;
802         }
803         if (u4_pred_mode != PRED_L0)
804         {
805             ps_mb_hdr->ai2_mv[1][0] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvx
806                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvx;
807 
808             ps_mb_hdr->ai2_mv[1][1] = ps_proc->ps_pu->s_me_info[1].s_mv.i2_mvy
809                             - ps_proc->ps_pred_mv[1].s_mv.i2_mvy;
810         }
811 
812         /* end of mb layer */
813         pu1_ptr += sizeof(mb_hdr_b16x16_t);
814         ps_proc->pv_mb_header_data = pu1_ptr;
815 
816     }
817     else if(u4_mb_type == BDIRECT)
818     {
819         /* pointer to mb header storage space */
820         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
821         mb_hdr_bdirect_t *ps_mb_hdr = (mb_hdr_bdirect_t *)ps_proc->pv_mb_header_data;
822 
823         /* mb type plus mode */
824         ps_mb_hdr->common.u1_mb_type_mode = u4_mb_type;
825 
826         /* cbp */
827         ps_mb_hdr->common.u1_cbp = ps_proc->u4_cbp;
828 
829         /* mb qp delta */
830         ps_mb_hdr->common.u1_mb_qp_delta = ps_proc->u4_mb_qp - ps_proc->u4_mb_qp_prev;
831 
832         /* end of mb layer */
833         pu1_ptr += sizeof(mb_hdr_bdirect_t);
834         ps_proc->pv_mb_header_data = pu1_ptr;
835 
836     }
837     else if(u4_mb_type == BSKIP)
838     {
839         UWORD32 u4_pred_mode = ps_proc->ps_pu->b2_pred_mode;
840 
841         /* pointer to mb header storage space */
842         UWORD8 *pu1_ptr = ps_proc->pv_mb_header_data;
843         mb_hdr_bskip_t *ps_mb_hdr = (mb_hdr_bskip_t *)ps_proc->pv_mb_header_data;
844 
845         /* mb type plus mode */
846         ps_mb_hdr->common.u1_mb_type_mode = (u4_pred_mode << 4) + u4_mb_type;
847 
848         /* end of mb layer */
849         pu1_ptr += sizeof(mb_hdr_bskip_t);
850         ps_proc->pv_mb_header_data = pu1_ptr;
851     }
852 
853     return IH264E_SUCCESS;
854 }
855 
856 /**
857 *******************************************************************************
858 *
859 * @brief   update process context after encoding an mb. This involves preserving
860 * the current mb information for later use, initialize the proc ctxt elements to
861 * encode next mb.
862 *
863 * @par Description:
864 *  This function performs house keeping tasks after encoding an mb.
865 *  After encoding an mb, various elements of the process context needs to be
866 *  updated to encode the next mb. For instance, the source, recon and reference
867 *  pointers, mb indices have to be adjusted to the next mb. The slice index of
868 *  the current mb needs to be updated. If mb qp modulation is enabled, then if
869 *  the qp changes the quant param structure needs to be updated. Also to encoding
870 *  the next mb, the current mb info is used as part of mode prediction or mv
871 *  prediction. Hence the current mb info has to preserved at top/top left/left
872 *  locations.
873 *
874 * @param[in] ps_proc
875 *  Pointer to the current process context
876 *
877 * @returns none
878 *
879 * @remarks none
880 *
881 *******************************************************************************
882 */
ih264e_update_proc_ctxt(process_ctxt_t * ps_proc)883 WORD32 ih264e_update_proc_ctxt(process_ctxt_t *ps_proc)
884 {
885     /* error status */
886     WORD32 error_status = IH264_SUCCESS;
887 
888     /* codec context */
889     codec_t *ps_codec = ps_proc->ps_codec;
890 
891     /* curr mb indices */
892     WORD32 i4_mb_x = ps_proc->i4_mb_x;
893     WORD32 i4_mb_y = ps_proc->i4_mb_y;
894 
895     /* mb syntax elements of neighbors */
896     mb_info_t *ps_left_syn =  &ps_proc->s_left_mb_syntax_ele;
897     mb_info_t *ps_top_syn = ps_proc->ps_top_row_mb_syntax_ele + i4_mb_x;
898     mb_info_t *ps_top_left_syn = &ps_proc->s_top_left_mb_syntax_ele;
899 
900     /* curr mb type */
901     UWORD32 u4_mb_type = ps_proc->u4_mb_type;
902 
903     /* curr mb type */
904     UWORD32 u4_is_intra = ps_proc->u4_is_intra;
905 
906     /* width in mbs */
907     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
908 
909     /*height in mbs*/
910     WORD32 i4_ht_mbs = ps_proc->i4_ht_mbs;
911 
912     /* proc map */
913     UWORD8 *pu1_proc_map = ps_proc->pu1_proc_map + (i4_mb_y * i4_wd_mbs);
914 
915     /* deblk context */
916     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
917 
918     /* deblk bs context */
919     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
920 
921     /* top row motion vector info */
922     enc_pu_t *ps_top_row_pu = ps_proc->ps_top_row_pu + i4_mb_x;
923 
924     /* top left mb motion vector */
925     enc_pu_t *ps_top_left_mb_pu = &ps_proc->s_top_left_mb_pu;
926 
927     /* left mb motion vector */
928     enc_pu_t *ps_left_mb_pu = &ps_proc->s_left_mb_pu;
929 
930     /* sub mb modes */
931     UWORD8 *pu1_top_mb_intra_modes = ps_proc->pu1_top_mb_intra_modes + (i4_mb_x << 4);
932 
933     /*************************************************************/
934     /* During MV prediction, when top right mb is not available, */
935     /* top left mb info. is used for prediction. Hence the curr  */
936     /* top, which will be top left for the next mb needs to be   */
937     /* preserved before updating it with curr mb info.           */
938     /*************************************************************/
939 
940     /* mb type, mb class, csbp */
941     *ps_top_left_syn = *ps_top_syn;
942 
943     if (ps_proc->i4_slice_type != ISLICE)
944     {
945         /*****************************************/
946         /* update top left with top info results */
947         /*****************************************/
948         /* mv */
949         *ps_top_left_mb_pu = *ps_top_row_pu;
950     }
951 
952     /*************************************************/
953     /* update top and left with curr mb info results */
954     /*************************************************/
955 
956     /* mb type */
957     ps_left_syn->u2_mb_type = ps_top_syn->u2_mb_type = u4_mb_type;
958 
959     /* mb class */
960     ps_left_syn->u2_is_intra = ps_top_syn->u2_is_intra = u4_is_intra;
961 
962     /* csbp */
963     ps_left_syn->u4_csbp = ps_top_syn->u4_csbp = ps_proc->u4_csbp;
964 
965     /* distortion */
966     ps_left_syn->i4_mb_distortion = ps_top_syn->i4_mb_distortion = ps_proc->i4_mb_distortion;
967 
968     if (u4_is_intra)
969     {
970         /* mb / sub mb modes */
971         if (I16x16 == u4_mb_type)
972         {
973             pu1_top_mb_intra_modes[0] = ps_proc->au1_left_mb_intra_modes[0] = ps_proc->u1_l_i16_mode;
974         }
975         else if (I4x4 == u4_mb_type)
976         {
977             ps_codec->pf_mem_cpy_mul8(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
978             ps_codec->pf_mem_cpy_mul8(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_4x4_modes, 16);
979         }
980         else if (I8x8 == u4_mb_type)
981         {
982             memcpy(ps_proc->au1_left_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
983             memcpy(pu1_top_mb_intra_modes, ps_proc->au1_intra_luma_mb_8x8_modes, 4);
984         }
985 
986         if ((ps_proc->i4_slice_type == PSLICE) ||(ps_proc->i4_slice_type == BSLICE))
987         {
988             /* mv */
989             *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
990         }
991 
992         *ps_proc->pu4_mb_pu_cnt = 1;
993     }
994     else
995     {
996         /* mv */
997         *ps_left_mb_pu = *ps_top_row_pu = *(ps_proc->ps_pu);
998     }
999 
1000     /*
1001      * Mark that the MB has been coded intra
1002      * So that future AIRs can skip it
1003      */
1004     ps_proc->pu1_is_intra_coded[i4_mb_x + (i4_mb_y * i4_wd_mbs)] = u4_is_intra;
1005 
1006     /**************************************************/
1007     /* pack mb header info. for entropy coding        */
1008     /**************************************************/
1009     ih264e_pack_header_data(ps_proc);
1010 
1011     /* update previous mb qp */
1012     ps_proc->u4_mb_qp_prev = ps_proc->u4_mb_qp;
1013 
1014     /* store qp */
1015     ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1016 
1017     /*
1018      * We need to sync the cache to make sure that the nmv content of proc
1019      * is updated to cache properly
1020      */
1021     DATA_SYNC();
1022 
1023     /* Just before finishing the row, enqueue the job in to entropy queue.
1024      * The master thread depending on its convenience shall dequeue it and
1025      * performs entropy.
1026      *
1027      * WARN !! Placing this block post proc map update can cause queuing of
1028      * entropy jobs in out of order.
1029      */
1030     if (i4_mb_x == i4_wd_mbs - 1)
1031     {
1032         /* job structures */
1033         job_t s_job;
1034 
1035         /* job class */
1036         s_job.i4_cmd = CMD_ENTROPY;
1037 
1038         /* number of mbs to be processed in the current job */
1039         s_job.i2_mb_cnt = ps_codec->s_cfg.i4_wd_mbs;
1040 
1041         /* job start index x */
1042         s_job.i2_mb_x = 0;
1043 
1044         /* job start index y */
1045         s_job.i2_mb_y = ps_proc->i4_mb_y;
1046 
1047         /* proc base idx */
1048         s_job.i2_proc_base_idx = (ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS) ? (MAX_PROCESS_CTXT / 2) : 0;
1049 
1050         /* queue the job */
1051         error_status |= ih264_list_queue(ps_proc->pv_entropy_jobq, &s_job, 1);
1052 
1053         if(ps_proc->i4_mb_y == (i4_ht_mbs - 1))
1054             ih264_list_terminate(ps_codec->pv_entropy_jobq);
1055     }
1056 
1057     /* update proc map */
1058     pu1_proc_map[i4_mb_x] = 1;
1059 
1060     /**************************************************/
1061     /* update proc ctxt elements for encoding next mb */
1062     /**************************************************/
1063     /* update indices */
1064     i4_mb_x ++;
1065     ps_proc->i4_mb_x = i4_mb_x;
1066 
1067     if (ps_proc->i4_mb_x == i4_wd_mbs)
1068     {
1069         ps_proc->i4_mb_y++;
1070         ps_proc->i4_mb_x = 0;
1071     }
1072 
1073     /* update slice index */
1074     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[ps_proc->i4_mb_y * i4_wd_mbs + ps_proc->i4_mb_x];
1075 
1076     /* update buffers pointers */
1077     ps_proc->pu1_src_buf_luma += MB_SIZE;
1078     ps_proc->pu1_rec_buf_luma += MB_SIZE;
1079     ps_proc->apu1_ref_buf_luma[0] += MB_SIZE;
1080     ps_proc->apu1_ref_buf_luma[1] += MB_SIZE;
1081 
1082     /*
1083      * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1084      * the stride per MB is MB_SIZE
1085      */
1086     ps_proc->pu1_src_buf_chroma += MB_SIZE;
1087     ps_proc->pu1_rec_buf_chroma += MB_SIZE;
1088     ps_proc->apu1_ref_buf_chroma[0] += MB_SIZE;
1089     ps_proc->apu1_ref_buf_chroma[1] += MB_SIZE;
1090 
1091 
1092 
1093     /* Reset cost, distortion params */
1094     ps_proc->i4_mb_cost = INT_MAX;
1095     ps_proc->i4_mb_distortion = SHRT_MAX;
1096 
1097     ps_proc->ps_pu += *ps_proc->pu4_mb_pu_cnt;
1098 
1099     ps_proc->pu4_mb_pu_cnt += 1;
1100 
1101     /* Update colocated pu */
1102     if (ps_proc->i4_slice_type == BSLICE)
1103         ps_proc->ps_colpu += *(ps_proc->aps_mv_buf[1]->pu4_mb_pu_cnt +  (i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x);
1104 
1105     /* deblk ctxts */
1106     if (ps_proc->u4_disable_deblock_level != 1)
1107     {
1108         /* indices */
1109         ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1110         ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1111 
1112 #ifndef N_MB_ENABLE /* For N MB processing update take place inside deblocking function */
1113         ps_deblk->i4_mb_x ++;
1114 
1115         ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1116         /*
1117          * Note: Although chroma mb size is 8, as the chroma buffers are interleaved,
1118          * the stride per MB is MB_SIZE
1119          */
1120         ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1121 #endif
1122     }
1123 
1124     return error_status;
1125 }
1126 
1127 /**
1128 *******************************************************************************
1129 *
1130 * @brief   initialize process context.
1131 *
1132 * @par Description:
1133 *  Before dispatching the current job to process thread, the process context
1134 *  associated with the job is initialized. Usually every job aims to encode one
1135 *  row of mb's. Basing on the row indices provided by the job, the process
1136 *  context's buffer ptrs, slice indices and other elements that are necessary
1137 *  during core-coding are initialized.
1138 *
1139 * @param[in] ps_proc
1140 *  Pointer to the current process context
1141 *
1142 * @returns error status
1143 *
1144 * @remarks none
1145 *
1146 *******************************************************************************
1147 */
ih264e_init_proc_ctxt(process_ctxt_t * ps_proc)1148 IH264E_ERROR_T ih264e_init_proc_ctxt(process_ctxt_t *ps_proc)
1149 {
1150     /* codec context */
1151     codec_t *ps_codec = ps_proc->ps_codec;
1152 
1153     /* nmb processing context*/
1154     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1155 
1156     /* indices */
1157     WORD32 i4_mb_x, i4_mb_y;
1158 
1159     /* strides */
1160     WORD32 i4_src_strd = ps_proc->i4_src_strd;
1161     WORD32 i4_src_chroma_strd = ps_proc->i4_src_chroma_strd;
1162     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1163 
1164     /* quant params */
1165     quant_params_t *ps_qp_params = ps_proc->ps_qp_params[0];
1166 
1167     /* deblk ctxt */
1168     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1169 
1170     /* deblk bs context */
1171     bs_ctxt_t *ps_bs = &(ps_deblk->s_bs_ctxt);
1172 
1173     /* Pointer to mv_buffer of current frame */
1174     mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
1175 
1176     /* Pointers for color space conversion */
1177     UWORD8 *pu1_y_buf_base, *pu1_u_buf_base, *pu1_v_buf_base;
1178 
1179     /* Pad the MB to support non standard sizes */
1180     UWORD32 u4_pad_right_sz = ps_codec->s_cfg.u4_wd - ps_codec->s_cfg.u4_disp_wd;
1181     UWORD32 u4_pad_bottom_sz = ps_codec->s_cfg.u4_ht - ps_codec->s_cfg.u4_disp_ht;
1182     UWORD16 u2_num_rows = MB_SIZE;
1183     WORD32 convert_uv_only;
1184 
1185     /********************************************************************/
1186     /*                            BEGIN INIT                            */
1187     /********************************************************************/
1188 
1189     i4_mb_x = ps_proc->i4_mb_x;
1190     i4_mb_y = ps_proc->i4_mb_y;
1191 
1192     /* Number of mbs processed in one loop of process function */
1193     ps_proc->i4_nmb_ntrpy = ps_proc->i4_wd_mbs;
1194     ps_proc->u4_nmb_me = ps_proc->i4_wd_mbs;
1195 
1196     /* init buffer pointers */
1197     convert_uv_only = 1;
1198     if (u4_pad_bottom_sz || u4_pad_right_sz ||
1199         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE)
1200     {
1201         if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1202             u2_num_rows = (UWORD16) MB_SIZE - u4_pad_bottom_sz;
1203         ps_proc->pu1_src_buf_luma_base = ps_codec->pu1_y_csc_buf_base;
1204         i4_src_strd = ps_proc->i4_src_strd = ps_codec->s_cfg.u4_max_wd;
1205         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * MB_SIZE);
1206         convert_uv_only = 0;
1207     }
1208     else
1209     {
1210         i4_src_strd = ps_proc->i4_src_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1211         ps_proc->pu1_src_buf_luma = ps_proc->pu1_src_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_src_strd * (i4_mb_y * MB_SIZE);
1212     }
1213 
1214 
1215     if (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_422ILE ||
1216         ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420P ||
1217         ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) ||
1218         u4_pad_bottom_sz || u4_pad_right_sz)
1219     {
1220         if ((ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_UV) ||
1221             (ps_codec->s_cfg.e_inp_color_fmt == IV_YUV_420SP_VU))
1222             ps_proc->pu1_src_buf_chroma_base = ps_codec->pu1_uv_csc_buf_base;
1223 
1224         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + ps_codec->s_cfg.u4_max_wd * (i4_mb_y * BLK8x8SIZE);
1225         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_codec->s_cfg.u4_max_wd;
1226     }
1227     else
1228     {
1229         i4_src_chroma_strd = ps_proc->i4_src_chroma_strd = ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1230         ps_proc->pu1_src_buf_chroma = ps_proc->pu1_src_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_src_chroma_strd * (i4_mb_y * BLK8x8SIZE);
1231     }
1232 
1233     ps_proc->pu1_rec_buf_luma = ps_proc->pu1_rec_buf_luma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1234     ps_proc->pu1_rec_buf_chroma = ps_proc->pu1_rec_buf_chroma_base + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1235 
1236     /* Tempral back and forward reference buffer */
1237     ps_proc->apu1_ref_buf_luma[0] = ps_proc->apu1_ref_buf_luma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1238     ps_proc->apu1_ref_buf_chroma[0] = ps_proc->apu1_ref_buf_chroma_base[0] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1239     ps_proc->apu1_ref_buf_luma[1] = ps_proc->apu1_ref_buf_luma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * MB_SIZE);
1240     ps_proc->apu1_ref_buf_chroma[1] = ps_proc->apu1_ref_buf_chroma_base[1] + (i4_mb_x * MB_SIZE) + i4_rec_strd * (i4_mb_y * BLK8x8SIZE);
1241 
1242     /*
1243      * Do color space conversion
1244      * NOTE : We assume there that the number of MB's to process will not span multiple rows
1245      */
1246     switch (ps_codec->s_cfg.e_inp_color_fmt)
1247     {
1248         case IV_YUV_420SP_UV:
1249         case IV_YUV_420SP_VU:
1250             /* In case of 420 semi-planar input, copy last few rows to intermediate
1251                buffer as chroma trans functions access one extra byte due to interleaved input.
1252                This data will be padded if required */
1253             if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1) || u4_pad_bottom_sz || u4_pad_right_sz)
1254             {
1255                 WORD32 num_rows = MB_SIZE;
1256                 UWORD8 *pu1_src;
1257                 UWORD8 *pu1_dst;
1258                 WORD32 i;
1259                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1260                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1261 
1262                 pu1_dst = ps_proc->pu1_src_buf_luma;
1263 
1264                 /* If padding is required, we always copy luma, if padding isn't required we never copy luma. */
1265                 if (u4_pad_bottom_sz || u4_pad_right_sz) {
1266                     if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1267                         num_rows = MB_SIZE - u4_pad_bottom_sz;
1268                     for (i = 0; i < num_rows; i++)
1269                     {
1270                         memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1271                         pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[0];
1272                         pu1_dst += ps_proc->i4_src_strd;
1273                     }
1274                 }
1275                 pu1_src = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1276                           ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1277                 pu1_dst = ps_proc->pu1_src_buf_chroma;
1278 
1279                 /* Last MB row of chroma is copied unconditionally, since trans functions access an extra byte
1280                  * due to interleaved input
1281                  */
1282                 if (ps_proc->i4_mb_y == (ps_proc->i4_ht_mbs - 1))
1283                     num_rows = (ps_codec->s_cfg.u4_disp_ht >> 1) - (ps_proc->i4_mb_y * BLK8x8SIZE);
1284                 else
1285                     num_rows = BLK8x8SIZE;
1286                 for (i = 0; i < num_rows; i++)
1287                 {
1288                     memcpy(pu1_dst, pu1_src, ps_codec->s_cfg.u4_wd);
1289                     pu1_src += ps_proc->s_inp_buf.s_raw_buf.au4_strd[1];
1290                     pu1_dst += ps_proc->i4_src_chroma_strd;
1291                 }
1292 
1293             }
1294             break;
1295 
1296         case IV_YUV_420P :
1297             pu1_y_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE) +
1298                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1299 
1300             pu1_u_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[1] + (i4_mb_x * BLK8x8SIZE) +
1301                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1] * (i4_mb_y * BLK8x8SIZE);
1302 
1303             pu1_v_buf_base = (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[2] + (i4_mb_x * BLK8x8SIZE) +
1304                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2] * (i4_mb_y * BLK8x8SIZE);
1305 
1306             ps_codec->pf_ih264e_conv_420p_to_420sp(
1307                             pu1_y_buf_base, pu1_u_buf_base, pu1_v_buf_base,
1308                             ps_proc->pu1_src_buf_luma,
1309                             ps_proc->pu1_src_buf_chroma, u2_num_rows,
1310                             ps_codec->s_cfg.u4_disp_wd,
1311                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0],
1312                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[1],
1313                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[2],
1314                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1315                             convert_uv_only);
1316             break;
1317 
1318         case IV_YUV_422ILE :
1319             pu1_y_buf_base =  (UWORD8 *)ps_proc->s_inp_buf.s_raw_buf.apv_bufs[0] + (i4_mb_x * MB_SIZE * 2)
1320                               + ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] * (i4_mb_y * MB_SIZE);
1321 
1322             ps_codec->pf_ih264e_fmt_conv_422i_to_420sp(
1323                             ps_proc->pu1_src_buf_luma,
1324                             ps_proc->pu1_src_buf_chroma,
1325                             ps_proc->pu1_src_buf_chroma + 1, pu1_y_buf_base,
1326                             ps_codec->s_cfg.u4_disp_wd, u2_num_rows,
1327                             ps_proc->i4_src_strd, ps_proc->i4_src_chroma_strd,
1328                             ps_proc->i4_src_chroma_strd,
1329                             ps_proc->s_inp_buf.s_raw_buf.au4_strd[0] >> 1);
1330             break;
1331 
1332         default:
1333             break;
1334     }
1335 
1336     if (u4_pad_right_sz && (ps_proc->i4_mb_x == 0))
1337     {
1338         UWORD32 u4_pad_wd, u4_pad_ht;
1339         u4_pad_wd = (UWORD32)(ps_proc->i4_src_strd - ps_codec->s_cfg.u4_disp_wd);
1340         u4_pad_wd = MIN(u4_pad_right_sz, u4_pad_wd);
1341         u4_pad_ht = MB_SIZE;
1342         if(ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1343             u4_pad_ht = MIN(MB_SIZE, (MB_SIZE - u4_pad_bottom_sz));
1344 
1345         ih264_pad_right_luma(
1346                         ps_proc->pu1_src_buf_luma + ps_codec->s_cfg.u4_disp_wd,
1347                         ps_proc->i4_src_strd, u4_pad_ht, u4_pad_wd);
1348 
1349         ih264_pad_right_chroma(
1350                         ps_proc->pu1_src_buf_chroma + ps_codec->s_cfg.u4_disp_wd,
1351                         ps_proc->i4_src_chroma_strd, u4_pad_ht / 2, u4_pad_wd);
1352     }
1353 
1354     /* pad bottom edge */
1355     if (u4_pad_bottom_sz && (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1) && ps_proc->i4_mb_x == 0)
1356     {
1357         ih264_pad_bottom(ps_proc->pu1_src_buf_luma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_strd,
1358                          ps_proc->i4_src_strd, ps_proc->i4_src_strd, u4_pad_bottom_sz);
1359 
1360         ih264_pad_bottom(ps_proc->pu1_src_buf_chroma + (MB_SIZE - u4_pad_bottom_sz) * ps_proc->i4_src_chroma_strd / 2,
1361                          ps_proc->i4_src_chroma_strd, ps_proc->i4_src_chroma_strd, (u4_pad_bottom_sz / 2));
1362     }
1363 
1364 
1365     /* packed mb coeff data */
1366     ps_proc->pv_mb_coeff_data = ((UWORD8 *)ps_proc->pv_pic_mb_coeff_data) + i4_mb_y * ps_codec->u4_size_coeff_data;
1367 
1368     /* packed mb header data */
1369     ps_proc->pv_mb_header_data = ((UWORD8 *)ps_proc->pv_pic_mb_header_data) + i4_mb_y * ps_codec->u4_size_header_data;
1370 
1371     /* slice index */
1372     ps_proc->i4_cur_slice_idx = ps_proc->pu1_slice_idx[i4_mb_y * ps_proc->i4_wd_mbs + i4_mb_x];
1373 
1374     /*********************************************************************/
1375     /* ih264e_init_quant_params() routine is called at the pic init level*/
1376     /* this would have initialized the qp.                               */
1377     /* TODO_LATER: currently it is assumed that quant params donot change*/
1378     /* across mb's. When they do calculate update ps_qp_params accordingly*/
1379     /*********************************************************************/
1380 
1381     /* init mv buffer ptr */
1382     ps_proc->ps_pu = ps_cur_mv_buf->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1383                      ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1384 
1385     /* Init co-located mv buffer */
1386     ps_proc->ps_colpu = ps_proc->aps_mv_buf[1]->ps_pic_pu + (i4_mb_y * ps_proc->i4_wd_mbs *
1387                         ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1388 
1389     if (i4_mb_y == 0)
1390     {
1391         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu;
1392     }
1393     else
1394     {
1395         ps_proc->ps_top_row_pu_ME = ps_cur_mv_buf->ps_pic_pu + ((i4_mb_y - 1) * ps_proc->i4_wd_mbs *
1396                                     ((MB_SIZE * MB_SIZE) / (ENC_MIN_PU_SIZE * ENC_MIN_PU_SIZE)));
1397     }
1398 
1399     ps_proc->pu4_mb_pu_cnt = ps_cur_mv_buf->pu4_mb_pu_cnt + (i4_mb_y * ps_proc->i4_wd_mbs);
1400 
1401     /* mb type */
1402     ps_proc->u4_mb_type = I16x16;
1403 
1404     /* lambda */
1405     ps_proc->u4_lambda = gu1_qp0[ps_qp_params->u1_mb_qp];
1406 
1407     /* mb distortion */
1408     ps_proc->i4_mb_distortion = SHRT_MAX;
1409 
1410     if (i4_mb_x == 0)
1411     {
1412         ps_proc->s_left_mb_syntax_ele.i4_mb_distortion = 0;
1413 
1414         ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion = 0;
1415 
1416         ps_proc->s_top_left_mb_syntax_ME.i4_mb_distortion = 0;
1417 
1418         if (i4_mb_y == 0)
1419         {
1420             memset(ps_proc->ps_top_row_mb_syntax_ele, 0, (ps_proc->i4_wd_mbs + 1)*sizeof(mb_info_t));
1421         }
1422     }
1423 
1424     /* mb cost */
1425     ps_proc->i4_mb_cost = INT_MAX;
1426 
1427     /**********************/
1428     /* init deblk context */
1429     /**********************/
1430     ps_deblk->i4_mb_x = ps_proc->i4_mb_x;
1431     /* deblk lags the current mb proc by 1 row */
1432     /* NOTE: Intra prediction has to happen with non deblocked samples used as reference */
1433     /* Hence to deblk MB 0 of row 0, you have wait till MB 0 of row 1 is encoded. */
1434     /* For simplicity, we chose to lag deblking by 1 Row wrt to proc */
1435     ps_deblk->i4_mb_y = ps_proc->i4_mb_y - 1;
1436 
1437     /* buffer ptrs */
1438     ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + i4_rec_strd * (ps_deblk->i4_mb_y * MB_SIZE);
1439     ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + i4_rec_strd * (ps_deblk->i4_mb_y * BLK8x8SIZE);
1440 
1441     /* init deblk bs context */
1442     /* mb indices */
1443     ps_bs->i4_mb_x = ps_proc->i4_mb_x;
1444     ps_bs->i4_mb_y = ps_proc->i4_mb_y;
1445 
1446     /* init n_mb_process  context */
1447     ps_n_mb_ctxt->i4_mb_x = 0;
1448     ps_n_mb_ctxt->i4_mb_y = ps_deblk->i4_mb_y;
1449     ps_n_mb_ctxt->i4_n_mbs = ps_proc->i4_nmb_ntrpy;
1450 
1451     return IH264E_SUCCESS;
1452 }
1453 
1454 /**
1455 *******************************************************************************
1456 *
1457 * @brief This function performs luma & chroma padding
1458 *
1459 * @par Description:
1460 *
1461 * @param[in] ps_proc
1462 *  Process context corresponding to the job
1463 *
1464 * @param[in] pu1_curr_pic_luma
1465 *  Pointer to luma buffer
1466 *
1467 * @param[in] pu1_curr_pic_chroma
1468 *  Pointer to chroma buffer
1469 *
1470 * @param[in] i4_mb_x
1471 *  mb index x
1472 *
1473 * @param[in] i4_mb_y
1474 *  mb index y
1475 *
1476 *  @param[in] i4_pad_ht
1477 *  number of rows to be padded
1478 *
1479 * @returns  error status
1480 *
1481 * @remarks none
1482 *
1483 *******************************************************************************
1484 */
ih264e_pad_recon_buffer(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y,WORD32 i4_pad_ht)1485 IH264E_ERROR_T ih264e_pad_recon_buffer(process_ctxt_t *ps_proc,
1486                                        UWORD8 *pu1_curr_pic_luma,
1487                                        UWORD8 *pu1_curr_pic_chroma,
1488                                        WORD32 i4_mb_x,
1489                                        WORD32 i4_mb_y,
1490                                        WORD32 i4_pad_ht)
1491 {
1492     /* codec context */
1493     codec_t *ps_codec = ps_proc->ps_codec;
1494 
1495     /* strides */
1496     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1497 
1498     if (i4_mb_x == 0)
1499     {
1500         /* padding left luma */
1501         ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, i4_pad_ht, PAD_LEFT);
1502 
1503         /* padding left chroma */
1504         ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, i4_pad_ht >> 1, PAD_LEFT);
1505     }
1506     if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1507     {
1508         /* padding right luma */
1509         ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, i4_pad_ht, PAD_RIGHT);
1510 
1511         /* padding right chroma */
1512         ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, i4_pad_ht >> 1, PAD_RIGHT);
1513 
1514         if (i4_mb_y == ps_proc->i4_ht_mbs - 1)
1515         {
1516             UWORD8 *pu1_rec_luma = pu1_curr_pic_luma + MB_SIZE + PAD_RIGHT + ((i4_pad_ht - 1) * i4_rec_strd);
1517             UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma + MB_SIZE + PAD_RIGHT + (((i4_pad_ht >> 1) - 1) * i4_rec_strd);
1518 
1519             /* padding bottom luma */
1520             ps_codec->pf_pad_bottom(pu1_rec_luma, i4_rec_strd, i4_rec_strd, PAD_BOT);
1521 
1522             /* padding bottom chroma */
1523             ps_codec->pf_pad_bottom(pu1_rec_chroma, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1524         }
1525     }
1526 
1527     if (i4_mb_y == 0)
1528     {
1529         UWORD8 *pu1_rec_luma = pu1_curr_pic_luma;
1530         UWORD8 *pu1_rec_chroma = pu1_curr_pic_chroma;
1531         WORD32 wd = MB_SIZE;
1532 
1533         if (i4_mb_x == 0)
1534         {
1535             pu1_rec_luma -= PAD_LEFT;
1536             pu1_rec_chroma -= PAD_LEFT;
1537 
1538             wd += PAD_LEFT;
1539         }
1540         if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1541         {
1542             wd += PAD_RIGHT;
1543         }
1544 
1545         /* padding top luma */
1546         ps_codec->pf_pad_top(pu1_rec_luma, i4_rec_strd, wd, PAD_TOP);
1547 
1548         /* padding top chroma */
1549         ps_codec->pf_pad_top(pu1_rec_chroma, i4_rec_strd, wd, (PAD_TOP >> 1));
1550     }
1551 
1552     return IH264E_SUCCESS;
1553 }
1554 
1555 
1556 
1557 
1558 /**
1559 *******************************************************************************
1560 *
1561 * @brief This function performs deblocking, padding and halfpel generation for
1562 *  'n' MBs
1563 *
1564 * @par Description:
1565 *
1566 * @param[in] ps_proc
1567 *  Process context corresponding to the job
1568 *
1569 * @param[in] pu1_curr_pic_luma
1570 * Current MB being processed(Luma)
1571 *
1572 * @param[in] pu1_curr_pic_chroma
1573 * Current MB being processed(Chroma)
1574 *
1575 * @param[in] i4_mb_x
1576 * Column value of current MB processed
1577 *
1578 * @param[in] i4_mb_y
1579 * Curent row processed
1580 *
1581 * @returns  error status
1582 *
1583 * @remarks none
1584 *
1585 *******************************************************************************
1586 */
ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t * ps_proc,UWORD8 * pu1_curr_pic_luma,UWORD8 * pu1_curr_pic_chroma,WORD32 i4_mb_x,WORD32 i4_mb_y)1587 IH264E_ERROR_T ih264e_dblk_pad_hpel_processing_n_mbs(process_ctxt_t *ps_proc,
1588                                                      UWORD8 *pu1_curr_pic_luma,
1589                                                      UWORD8 *pu1_curr_pic_chroma,
1590                                                      WORD32 i4_mb_x,
1591                                                      WORD32 i4_mb_y)
1592 {
1593     /* codec context */
1594     codec_t *ps_codec = ps_proc->ps_codec;
1595 
1596     /* n_mb processing context */
1597     n_mb_process_ctxt_t *ps_n_mb_ctxt = &ps_proc->s_n_mb_ctxt;
1598 
1599     /* deblk context */
1600     deblk_ctxt_t *ps_deblk = &ps_proc->s_deblk_ctxt;
1601 
1602     /* strides */
1603     WORD32 i4_rec_strd = ps_proc->i4_rec_strd;
1604 
1605     /* loop variables */
1606     WORD32 row, i, j, col;
1607 
1608     /* Padding Width */
1609     UWORD32 u4_pad_wd;
1610 
1611     /* deblk_map of the row being deblocked */
1612     UWORD8 *pu1_deblk_map = ps_proc->pu1_deblk_map + ps_deblk->i4_mb_y * ps_proc->i4_wd_mbs;
1613 
1614     /* deblk_map_previous row */
1615     UWORD8 *pu1_deblk_map_prev_row = pu1_deblk_map - ps_proc->i4_wd_mbs;
1616 
1617     WORD32 u4_pad_top = 0;
1618 
1619     WORD32 u4_deblk_prev_row = 0;
1620 
1621     /* Number of mbs to be processed */
1622     WORD32 i4_n_mbs = ps_n_mb_ctxt->i4_n_mbs;
1623 
1624     /* Number of mbs  actually processed
1625      * (at the end of a row, when remaining number of MBs are less than i4_n_mbs) */
1626     WORD32 i4_n_mb_process_count = 0;
1627 
1628     UWORD8 *pu1_pad_bottom_src = NULL;
1629 
1630     UWORD8 *pu1_pad_src_luma = NULL;
1631     UWORD8 *pu1_pad_src_chroma = NULL;
1632 
1633     if (ps_proc->u4_disable_deblock_level == 1)
1634     {
1635         /* If left most MB is processed, then pad left */
1636         if (i4_mb_x == 0)
1637         {
1638             /* padding left luma */
1639             ps_codec->pf_pad_left_luma(pu1_curr_pic_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1640 
1641             /* padding left chroma */
1642             ps_codec->pf_pad_left_chroma(pu1_curr_pic_chroma, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1643         }
1644         /*last col*/
1645         if (i4_mb_x == (ps_proc->i4_wd_mbs - 1))
1646         {
1647             /* padding right luma */
1648             ps_codec->pf_pad_right_luma(pu1_curr_pic_luma + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1649 
1650             /* padding right chroma */
1651             ps_codec->pf_pad_right_chroma(pu1_curr_pic_chroma + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1652         }
1653     }
1654 
1655     if ((i4_mb_y > 0) || (i4_mb_y == (ps_proc->i4_ht_mbs - 1)))
1656     {
1657         /* if number of mb's to be processed are less than 'N', go back.
1658          * exception to the above clause is end of row */
1659         if ( ((i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1)) < i4_n_mbs) && (i4_mb_x < (ps_proc->i4_wd_mbs - 1)) )
1660         {
1661             return IH264E_SUCCESS;
1662         }
1663         else
1664         {
1665             i4_n_mb_process_count = MIN(i4_mb_x - (ps_n_mb_ctxt->i4_mb_x - 1), i4_n_mbs);
1666 
1667             /* performing deblocking for required number of MBs */
1668             if ((i4_mb_y > 0) && (ps_proc->u4_disable_deblock_level != 1))
1669             {
1670                 u4_deblk_prev_row = 1;
1671 
1672                 /* checking whether the top rows are deblocked */
1673                 for (col = 0; col < i4_n_mb_process_count; col++)
1674                 {
1675                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + col];
1676                 }
1677 
1678                 /* checking whether the top right MB is deblocked */
1679                 if ((ps_deblk->i4_mb_x + i4_n_mb_process_count) != ps_proc->i4_wd_mbs)
1680                 {
1681                     u4_deblk_prev_row &= pu1_deblk_map_prev_row[ps_deblk->i4_mb_x + i4_n_mb_process_count];
1682                 }
1683 
1684                 /* Top or Top right MBs not deblocked */
1685                 if ((u4_deblk_prev_row != 1) && (i4_mb_y > 0))
1686                 {
1687                     return IH264E_SUCCESS;
1688                 }
1689 
1690                 for (row = 0; row < i4_n_mb_process_count; row++)
1691                 {
1692                     ih264e_deblock_mb(ps_proc, ps_deblk);
1693 
1694                     pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1695 
1696                     if (ps_deblk->i4_mb_y > 0)
1697                     {
1698                         if (ps_deblk->i4_mb_x == 0)/* If left most MB is processed, then pad left*/
1699                         {
1700                             /* padding left luma */
1701                             ps_codec->pf_pad_left_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE, i4_rec_strd, MB_SIZE, PAD_LEFT);
1702 
1703                             /* padding left chroma */
1704                             ps_codec->pf_pad_left_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_LEFT);
1705                         }
1706 
1707                         if (ps_deblk->i4_mb_x == (ps_proc->i4_wd_mbs - 1))/*last column*/
1708                         {
1709                             /* padding right luma */
1710                             ps_codec->pf_pad_right_luma(ps_deblk->pu1_cur_pic_luma - i4_rec_strd * MB_SIZE + MB_SIZE, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1711 
1712                             /* padding right chroma */
1713                             ps_codec->pf_pad_right_chroma(ps_deblk->pu1_cur_pic_chroma - i4_rec_strd * BLK8x8SIZE + MB_SIZE, i4_rec_strd, MB_SIZE >> 1, PAD_RIGHT);
1714                         }
1715                     }
1716                     ps_deblk->i4_mb_x++;
1717 
1718                     ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1719                     ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1720 
1721                 }
1722             }
1723             else if(i4_mb_y > 0)
1724             {
1725                 ps_deblk->i4_mb_x += i4_n_mb_process_count;
1726 
1727                 ps_deblk->pu1_cur_pic_luma += i4_n_mb_process_count * MB_SIZE;
1728                 ps_deblk->pu1_cur_pic_chroma += i4_n_mb_process_count * MB_SIZE;
1729             }
1730 
1731             if (i4_mb_y == 2)
1732             {
1733                 u4_pad_wd = i4_n_mb_process_count * MB_SIZE;
1734                 u4_pad_top = ps_n_mb_ctxt->i4_mb_x * MB_SIZE;
1735 
1736                 if (ps_n_mb_ctxt->i4_mb_x == 0)
1737                 {
1738                     u4_pad_wd += PAD_LEFT;
1739                     u4_pad_top = -PAD_LEFT;
1740                 }
1741 
1742                 if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1743                 {
1744                     u4_pad_wd += PAD_RIGHT;
1745                 }
1746 
1747                 /* padding top luma */
1748                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_luma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, PAD_TOP);
1749 
1750                 /* padding top chroma */
1751                 ps_codec->pf_pad_top(ps_proc->pu1_rec_buf_chroma_base + u4_pad_top, i4_rec_strd, u4_pad_wd, (PAD_TOP >> 1));
1752             }
1753 
1754             ps_n_mb_ctxt->i4_mb_x += i4_n_mb_process_count;
1755 
1756             if (i4_mb_x == ps_proc->i4_wd_mbs - 1)
1757             {
1758                 if (ps_proc->i4_mb_y == ps_proc->i4_ht_mbs - 1)
1759                 {
1760                     /* Bottom Padding is done in one stretch for the entire width */
1761                     if (ps_proc->u4_disable_deblock_level != 1)
1762                     {
1763                         ps_deblk->pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * MB_SIZE;
1764 
1765                         ps_deblk->pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 1) * i4_rec_strd * BLK8x8SIZE;
1766 
1767                         ps_n_mb_ctxt->i4_mb_x = 0;
1768                         ps_n_mb_ctxt->i4_mb_y = ps_proc->i4_mb_y;
1769                         ps_deblk->i4_mb_x = 0;
1770                         ps_deblk->i4_mb_y = ps_proc->i4_mb_y;
1771 
1772                         /* update pic qp map (as update_proc_ctxt is still not called for the last MB) */
1773                         ps_proc->s_deblk_ctxt.s_bs_ctxt.pu1_pic_qp[(i4_mb_y * ps_proc->i4_wd_mbs) + i4_mb_x] = ps_proc->u4_mb_qp;
1774 
1775                         i4_n_mb_process_count = (ps_proc->i4_wd_mbs) % i4_n_mbs;
1776 
1777                         j = (ps_proc->i4_wd_mbs) / i4_n_mbs;
1778 
1779                         for (i = 0; i < j; i++)
1780                         {
1781                             for (col = 0; col < i4_n_mbs; col++)
1782                             {
1783                                 ih264e_deblock_mb(ps_proc, ps_deblk);
1784 
1785                                 pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1786 
1787                                 ps_deblk->i4_mb_x++;
1788                                 ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1789                                 ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1790                                 ps_n_mb_ctxt->i4_mb_x++;
1791                             }
1792                         }
1793 
1794                         for (col = 0; col < i4_n_mb_process_count; col++)
1795                         {
1796                             ih264e_deblock_mb(ps_proc, ps_deblk);
1797 
1798                             pu1_deblk_map[ps_deblk->i4_mb_x] = 1;
1799 
1800                             ps_deblk->i4_mb_x++;
1801                             ps_deblk->pu1_cur_pic_luma += MB_SIZE;
1802                             ps_deblk->pu1_cur_pic_chroma += MB_SIZE;
1803                             ps_n_mb_ctxt->i4_mb_x++;
1804                         }
1805 
1806                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd;
1807 
1808                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd;
1809 
1810                         /* padding left luma */
1811                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1812 
1813                         /* padding left chroma */
1814                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1815 
1816                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1817                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1818 
1819                         /* padding left luma */
1820                         ps_codec->pf_pad_left_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_LEFT);
1821 
1822                         /* padding left chroma */
1823                         ps_codec->pf_pad_left_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_LEFT);
1824 
1825                         pu1_pad_src_luma = ps_proc->pu1_rec_buf_luma_base + (ps_proc->i4_ht_mbs - 2) * MB_SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1826 
1827                         pu1_pad_src_chroma = ps_proc->pu1_rec_buf_chroma_base + (ps_proc->i4_ht_mbs - 2) * BLK8x8SIZE * i4_rec_strd + (ps_proc->i4_wd_mbs) * MB_SIZE;
1828 
1829                         /* padding right luma */
1830                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1831 
1832                         /* padding right chroma */
1833                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1834 
1835                         pu1_pad_src_luma += i4_rec_strd * MB_SIZE;
1836                         pu1_pad_src_chroma += i4_rec_strd * BLK8x8SIZE;
1837 
1838                         /* padding right luma */
1839                         ps_codec->pf_pad_right_luma(pu1_pad_src_luma, i4_rec_strd, MB_SIZE, PAD_RIGHT);
1840 
1841                         /* padding right chroma */
1842                         ps_codec->pf_pad_right_chroma(pu1_pad_src_chroma, i4_rec_strd, BLK8x8SIZE, PAD_RIGHT);
1843 
1844                     }
1845 
1846                     /* In case height is less than 2 MBs pad top */
1847                     if (ps_proc->i4_ht_mbs <= 2)
1848                     {
1849                         UWORD8 *pu1_pad_top_src;
1850                         /* padding top luma */
1851                         pu1_pad_top_src = ps_proc->pu1_rec_buf_luma_base - PAD_LEFT;
1852                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, PAD_TOP);
1853 
1854                         /* padding top chroma */
1855                         pu1_pad_top_src = ps_proc->pu1_rec_buf_chroma_base - PAD_LEFT;
1856                         ps_codec->pf_pad_top(pu1_pad_top_src, i4_rec_strd, i4_rec_strd, (PAD_TOP >> 1));
1857                     }
1858 
1859                     /* padding bottom luma */
1860                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_luma_base + ps_proc->i4_ht_mbs * MB_SIZE * i4_rec_strd - PAD_LEFT;
1861                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, PAD_BOT);
1862 
1863                     /* padding bottom chroma */
1864                     pu1_pad_bottom_src = ps_proc->pu1_rec_buf_chroma_base + ps_proc->i4_ht_mbs * (MB_SIZE >> 1) * i4_rec_strd - PAD_LEFT;
1865                     ps_codec->pf_pad_bottom(pu1_pad_bottom_src, i4_rec_strd, i4_rec_strd, (PAD_BOT >> 1));
1866                 }
1867             }
1868         }
1869     }
1870 
1871     return IH264E_SUCCESS;
1872 }
1873 
1874 
1875 /**
1876 *******************************************************************************
1877 *
1878 * @brief This function performs luma & chroma core coding for a set of mb's.
1879 *
1880 * @par Description:
1881 *  The mb to be coded is taken and is evaluated over a predefined set of modes
1882 *  (intra (i16, i4, i8)/inter (mv, skip)) for best cost. The mode with least cost
1883 *  is selected and using intra/inter prediction filters, prediction is carried out.
1884 *  The deviation between src and pred signal constitutes error signal. This error
1885 *  signal is transformed (hierarchical transform if necessary) and quantized. The
1886 *  quantized residue is packed in to entropy buffer for entropy coding. This is
1887 *  repeated for all the mb's enlisted under the job.
1888 *
1889 * @param[in] ps_proc
1890 *  Process context corresponding to the job
1891 *
1892 * @returns  error status
1893 *
1894 * @remarks none
1895 *
1896 *******************************************************************************
1897 */
ih264e_process(process_ctxt_t * ps_proc)1898 WORD32 ih264e_process(process_ctxt_t *ps_proc)
1899 {
1900     /* error status */
1901     WORD32 error_status = IH264_SUCCESS;
1902 
1903     /* codec context */
1904     codec_t *ps_codec = ps_proc->ps_codec;
1905 
1906     /* cbp luma, chroma */
1907     UWORD32 u4_cbp_l, u4_cbp_c;
1908 
1909     /* width in mbs */
1910     WORD32 i4_wd_mbs = ps_proc->i4_wd_mbs;
1911 
1912     /* loop var */
1913     WORD32  i4_mb_idx, i4_mb_cnt = ps_proc->i4_mb_cnt;
1914 
1915     /* valid modes */
1916     UWORD32 u4_valid_modes = 0;
1917 
1918     /* gate threshold */
1919     WORD32 i4_gate_threshold = 0;
1920 
1921     /* is intra */
1922     WORD32 luma_idx, chroma_idx, is_intra;
1923 
1924     /* temp variables */
1925     WORD32 ctxt_sel = ps_proc->i4_encode_api_call_cnt % MAX_CTXT_SETS;
1926 
1927     /*
1928      * list of modes for evaluation
1929      * -------------------------------------------------------------------------
1930      * Note on enabling I4x4 and I16x16
1931      * At very low QP's the hadamard transform in I16x16 will push up the maximum
1932      * coeff value very high. CAVLC may not be able to represent the value and
1933      * hence the stream may not be decodable in some clips.
1934      * Hence at low QPs, we will enable I4x4 and disable I16x16 irrespective of preset.
1935      */
1936     if (ps_proc->i4_slice_type == ISLICE)
1937     {
1938         if (ps_proc->u4_frame_qp > 10)
1939         {
1940             /* enable intra 16x16 */
1941             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1942 
1943             /* enable intra 8x8 */
1944             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_8x8 ? (1 << I8x8) : 0;
1945         }
1946 
1947         /* enable intra 4x4 */
1948         u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1949         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1950 
1951     }
1952     else if (ps_proc->i4_slice_type == PSLICE)
1953     {
1954         if (ps_proc->u4_frame_qp > 10)
1955         {
1956             /* enable intra 16x16 */
1957             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1958         }
1959 
1960         /* enable intra 4x4 */
1961         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1962         {
1963             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1964         }
1965         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1966 
1967         /* enable inter P16x16 */
1968         u4_valid_modes |= (1 << P16x16);
1969     }
1970     else if (ps_proc->i4_slice_type == BSLICE)
1971     {
1972         if (ps_proc->u4_frame_qp > 10)
1973         {
1974             /* enable intra 16x16 */
1975             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_16x16 ? (1 << I16x16) : 0;
1976         }
1977 
1978         /* enable intra 4x4 */
1979         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
1980         {
1981             u4_valid_modes |= ps_codec->s_cfg.u4_enable_intra_4x4 ? (1 << I4x4) : 0;
1982         }
1983         u4_valid_modes |= (ps_proc->u4_frame_qp <= 10) << I4x4;
1984 
1985         /* enable inter B16x16 */
1986         u4_valid_modes |= (1 << B16x16);
1987     }
1988 
1989 
1990     /* init entropy */
1991     ps_proc->s_entropy.i4_mb_x = ps_proc->i4_mb_x;
1992     ps_proc->s_entropy.i4_mb_y = ps_proc->i4_mb_y;
1993     ps_proc->s_entropy.i4_mb_cnt = MIN(ps_proc->i4_nmb_ntrpy, i4_wd_mbs - ps_proc->i4_mb_x);
1994 
1995     /* compute recon when :
1996      *   1. current frame is to be used as a reference
1997      *   2. dump recon for bit stream sanity check
1998      */
1999     ps_proc->u4_compute_recon = ps_codec->u4_is_curr_frm_ref ||
2000                                 ps_codec->s_cfg.u4_enable_recon;
2001 
2002     /* Encode 'n' macroblocks,
2003      * 'n' being the number of mbs dictated by current proc ctxt */
2004     for (i4_mb_idx = 0; i4_mb_idx < i4_mb_cnt; i4_mb_idx ++)
2005     {
2006         /* since we have not yet found sad, we have not yet got min sad */
2007         /* we need to initialize these variables for each MB */
2008         /* TODO how to get the min sad into the codec */
2009         ps_proc->u4_min_sad = ps_codec->s_cfg.i4_min_sad;
2010         ps_proc->u4_min_sad_reached = 0;
2011 
2012         /* mb analysis */
2013         {
2014             /* temp var */
2015             WORD32 i4_mb_id = ps_proc->i4_mb_x + ps_proc->i4_mb_y * i4_wd_mbs;
2016 
2017             /* force intra refresh ? */
2018             WORD32 i4_air_enable_inter = (ps_codec->s_cfg.e_air_mode == IVE_AIR_MODE_NONE) ||
2019                             (ps_codec->pu2_intr_rfrsh_map[i4_mb_id] != ps_codec->i4_air_pic_cnt);
2020 
2021             /* evaluate inter 16x16 modes */
2022             if ((u4_valid_modes & (1 << P16x16)) || (u4_valid_modes & (1 << B16x16)))
2023             {
2024                 /* compute nmb me */
2025                 if (ps_proc->i4_mb_x % ps_proc->u4_nmb_me == 0)
2026                 {
2027                     ih264e_compute_me_nmb(ps_proc, MIN((WORD32)ps_proc->u4_nmb_me,
2028                                                        i4_wd_mbs - ps_proc->i4_mb_x));
2029                 }
2030 
2031                 /* set pointers to ME data appropriately for other modules to use */
2032                 {
2033                     UWORD32 u4_mb_index = ps_proc->i4_mb_x % ps_proc->u4_nmb_me ;
2034 
2035                     /* get the min sad condition for current mb */
2036                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2037                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2038 
2039                     ps_proc->ps_skip_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_skip_mv[0]);
2040                     ps_proc->ps_ngbr_avbl = &(ps_proc->ps_nmb_info[u4_mb_index].s_ngbr_avbl);
2041                     ps_proc->ps_pred_mv = &(ps_proc->ps_nmb_info[u4_mb_index].as_pred_mv[0]);
2042 
2043                     ps_proc->i4_mb_distortion = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_distortion;
2044                     ps_proc->i4_mb_cost = ps_proc->ps_nmb_info[u4_mb_index].i4_mb_cost;
2045                     ps_proc->u4_min_sad = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad;
2046                     ps_proc->u4_min_sad_reached = ps_proc->ps_nmb_info[u4_mb_index].u4_min_sad_reached;
2047                     ps_proc->u4_mb_type = ps_proc->ps_nmb_info[u4_mb_index].u4_mb_type;
2048 
2049                     /* get the best sub pel buffer */
2050                     ps_proc->pu1_best_subpel_buf = ps_proc->ps_nmb_info[u4_mb_index].pu1_best_sub_pel_buf;
2051                     ps_proc->u4_bst_spel_buf_strd = ps_proc->ps_nmb_info[u4_mb_index].u4_bst_spel_buf_strd;
2052                 }
2053                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2054             }
2055             else
2056             {
2057                 /* Derive neighbor availability for the current macroblock */
2058                 ps_proc->ps_ngbr_avbl = &ps_proc->s_ngbr_avbl;
2059 
2060                 ih264e_derive_nghbr_avbl_of_mbs(ps_proc);
2061             }
2062 
2063             /*
2064              * If air says intra, we need to force the following code path to evaluate intra
2065              * The easy way is just to say that the inter cost is too much
2066              */
2067             if (!i4_air_enable_inter)
2068             {
2069                 ps_proc->u4_min_sad_reached = 0;
2070                 ps_proc->i4_mb_cost = INT_MAX;
2071                 ps_proc->i4_mb_distortion = INT_MAX;
2072             }
2073             else if (ps_proc->u4_mb_type == PSKIP)
2074             {
2075                 goto UPDATE_MB_INFO;
2076             }
2077 
2078             /* wait until the proc of [top + 1] mb is computed.
2079              * We wait till the proc dependencies are satisfied */
2080              if(ps_proc->i4_mb_y > 0)
2081              {
2082                 /* proc map */
2083                 UWORD8  *pu1_proc_map_top;
2084 
2085                 pu1_proc_map_top = ps_proc->pu1_proc_map + ((ps_proc->i4_mb_y - 1) * i4_wd_mbs);
2086 
2087                 while (1)
2088                 {
2089                     volatile UWORD8 *pu1_buf;
2090                     WORD32 idx = i4_mb_idx + 1;
2091 
2092                     idx = MIN(idx, ((WORD32)ps_codec->s_cfg.i4_wd_mbs - 1));
2093                     pu1_buf =  pu1_proc_map_top + idx;
2094                     if(*pu1_buf)
2095                         break;
2096                     ithread_yield();
2097                 }
2098             }
2099 
2100             /* If we already have the minimum sad, there is no point in searching for sad again */
2101             if (ps_proc->u4_min_sad_reached == 0 || ps_codec->s_cfg.u4_enc_speed_preset != IVE_FASTEST)
2102             {
2103                 /* intra gating in inter slices */
2104                 /* No need of gating if we want to force intra, we need to find the threshold only if inter is enabled by AIR*/
2105                 if (i4_air_enable_inter && ps_proc->i4_slice_type != ISLICE && ps_codec->u4_inter_gate)
2106                 {
2107                     /* distortion of neighboring blocks */
2108                     WORD32 i4_distortion[4];
2109 
2110                     i4_distortion[0] = ps_proc->s_left_mb_syntax_ele.i4_mb_distortion;
2111 
2112                     i4_distortion[1] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x].i4_mb_distortion;
2113 
2114                     i4_distortion[2] = ps_proc->ps_top_row_mb_syntax_ele[ps_proc->i4_mb_x + 1].i4_mb_distortion;
2115 
2116                     i4_distortion[3] = ps_proc->s_top_left_mb_syntax_ele.i4_mb_distortion;
2117 
2118                     i4_gate_threshold = (i4_distortion[0] + i4_distortion[1] + i4_distortion[2] + i4_distortion[3]) >> 2;
2119 
2120                 }
2121 
2122 
2123                 /* If we are going to force intra we need to evaluate intra irrespective of gating */
2124                 if ( (!i4_air_enable_inter) || ((i4_gate_threshold + 16 *((WORD32) ps_proc->u4_lambda)) < ps_proc->i4_mb_distortion))
2125                 {
2126                     /* evaluate intra 4x4 modes */
2127                     if (u4_valid_modes & (1 << I4x4))
2128                     {
2129                         if (ps_codec->s_cfg.u4_enc_speed_preset == IVE_SLOWEST)
2130                         {
2131                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdopton(ps_proc);
2132                         }
2133                         else
2134                         {
2135                             ih264e_evaluate_intra4x4_modes_for_least_cost_rdoptoff(ps_proc);
2136                         }
2137                     }
2138 
2139                     /* evaluate intra 16x16 modes */
2140                     if (u4_valid_modes & (1 << I16x16))
2141                     {
2142                         ih264e_evaluate_intra16x16_modes_for_least_cost_rdoptoff(ps_proc);
2143                     }
2144 
2145                     /* evaluate intra 8x8 modes */
2146                     if (u4_valid_modes & (1 << I8x8))
2147                     {
2148                         ih264e_evaluate_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2149                     }
2150 
2151                 }
2152         }
2153      }
2154 
2155         /* is intra */
2156         if (ps_proc->u4_mb_type == I4x4 || ps_proc->u4_mb_type == I16x16 || ps_proc->u4_mb_type == I8x8)
2157         {
2158             luma_idx = ps_proc->u4_mb_type;
2159             chroma_idx = 0;
2160             is_intra = 1;
2161 
2162             /* evaluate chroma blocks for intra */
2163             ih264e_evaluate_chroma_intra8x8_modes_for_least_cost_rdoptoff(ps_proc);
2164         }
2165         else
2166         {
2167             luma_idx = 3;
2168             chroma_idx = 1;
2169             is_intra = 0;
2170         }
2171         ps_proc->u4_is_intra = is_intra;
2172         ps_proc->ps_pu->b1_intra_flag = is_intra;
2173 
2174         /* redo MV pred of neighbors in the case intra mb */
2175         /* TODO : currently called unconditionally, needs to be called only in the case of intra
2176          * to modify neighbors */
2177         if (ps_proc->i4_slice_type != ISLICE)
2178         {
2179             ih264e_mv_pred(ps_proc, ps_proc->i4_slice_type);
2180         }
2181 
2182         /* Perform luma mb core coding */
2183         u4_cbp_l = (ps_codec->luma_energy_compaction)[luma_idx](ps_proc);
2184 
2185         /* Perform luma mb core coding */
2186         u4_cbp_c = (ps_codec->chroma_energy_compaction)[chroma_idx](ps_proc);
2187 
2188         /* coded block pattern */
2189         ps_proc->u4_cbp = (u4_cbp_c << 4) | u4_cbp_l;
2190 
2191         if (!ps_proc->u4_is_intra)
2192         {
2193             if (ps_proc->i4_slice_type == BSLICE)
2194             {
2195                 if (ih264e_find_bskip_params(ps_proc, PRED_L0))
2196                 {
2197                     ps_proc->u4_mb_type = (ps_proc->u4_cbp) ? BDIRECT : BSKIP;
2198                 }
2199             }
2200             else if(!ps_proc->u4_cbp)
2201             {
2202                 if (ih264e_find_pskip_params(ps_proc, PRED_L0))
2203                 {
2204                     ps_proc->u4_mb_type = PSKIP;
2205                 }
2206             }
2207         }
2208 
2209 UPDATE_MB_INFO:
2210 
2211         /* Update mb sad, mb qp and intra mb cost. Will be used by rate control */
2212         ih264e_update_rc_mb_info(&ps_proc->s_frame_info, ps_proc);
2213 
2214         /**********************************************************************/
2215         /* if disable deblock level is '0' this implies enable deblocking for */
2216         /* all edges of all macroblocks with out any restrictions             */
2217         /*                                                                    */
2218         /* if disable deblock level is '1' this implies disable deblocking for*/
2219         /* all edges of all macroblocks with out any restrictions             */
2220         /*                                                                    */
2221         /* if disable deblock level is '2' this implies enable deblocking for */
2222         /* all edges of all macroblocks except edges overlapping with slice   */
2223         /* boundaries. This option is not currently supported by the encoder  */
2224         /* hence the slice map should be of no significance to perform debloc */
2225         /* king                                                               */
2226         /**********************************************************************/
2227 
2228         if (ps_proc->u4_compute_recon)
2229         {
2230             /* deblk context */
2231             /* src pointers */
2232             UWORD8 *pu1_cur_pic_luma = ps_proc->pu1_rec_buf_luma;
2233             UWORD8 *pu1_cur_pic_chroma = ps_proc->pu1_rec_buf_chroma;
2234 
2235             /* src indices */
2236             UWORD32 i4_mb_x = ps_proc->i4_mb_x;
2237             UWORD32 i4_mb_y = ps_proc->i4_mb_y;
2238 
2239             /* compute blocking strength */
2240             if (ps_proc->u4_disable_deblock_level != 1)
2241             {
2242                 ih264e_compute_bs(ps_proc);
2243             }
2244 
2245             /* nmb deblocking and hpel and padding */
2246             ih264e_dblk_pad_hpel_processing_n_mbs(ps_proc, pu1_cur_pic_luma,
2247                                                   pu1_cur_pic_chroma, i4_mb_x,
2248                                                   i4_mb_y);
2249         }
2250 
2251         /* update the context after for coding next mb */
2252         error_status |= ih264e_update_proc_ctxt(ps_proc);
2253 
2254         /* Once the last row is processed, mark the buffer status appropriately */
2255         if (ps_proc->i4_ht_mbs == ps_proc->i4_mb_y)
2256         {
2257             /* Pointer to current picture buffer structure */
2258             pic_buf_t *ps_cur_pic = ps_proc->ps_cur_pic;
2259 
2260             /* Pointer to current picture's mv buffer structure */
2261             mv_buf_t *ps_cur_mv_buf = ps_proc->ps_cur_mv_buf;
2262 
2263             /**********************************************************************/
2264             /* if disable deblock level is '0' this implies enable deblocking for */
2265             /* all edges of all macroblocks with out any restrictions             */
2266             /*                                                                    */
2267             /* if disable deblock level is '1' this implies disable deblocking for*/
2268             /* all edges of all macroblocks with out any restrictions             */
2269             /*                                                                    */
2270             /* if disable deblock level is '2' this implies enable deblocking for */
2271             /* all edges of all macroblocks except edges overlapping with slice   */
2272             /* boundaries. This option is not currently supported by the encoder  */
2273             /* hence the slice map should be of no significance to perform debloc */
2274             /* king                                                               */
2275             /**********************************************************************/
2276             error_status |= ih264_buf_mgr_release(ps_codec->pv_mv_buf_mgr, ps_cur_mv_buf->i4_buf_id , BUF_MGR_CODEC);
2277 
2278             error_status |= ih264_buf_mgr_release(ps_codec->pv_ref_buf_mgr, ps_cur_pic->i4_buf_id , BUF_MGR_CODEC);
2279 
2280             if (ps_codec->s_cfg.u4_enable_recon)
2281             {
2282                 /* pic cnt */
2283                 ps_codec->as_rec_buf[ctxt_sel].i4_pic_cnt = ps_proc->i4_pic_cnt;
2284 
2285                 /* rec buffers */
2286                 ps_codec->as_rec_buf[ctxt_sel].s_pic_buf  = *ps_proc->ps_cur_pic;
2287 
2288                 /* is last? */
2289                 ps_codec->as_rec_buf[ctxt_sel].u4_is_last = ps_proc->s_entropy.u4_is_last;
2290 
2291                 /* frame time stamp */
2292                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_high = ps_proc->s_entropy.u4_timestamp_high;
2293                 ps_codec->as_rec_buf[ctxt_sel].u4_timestamp_low = ps_proc->s_entropy.u4_timestamp_low;
2294             }
2295 
2296         }
2297     }
2298 
2299     DEBUG_HISTOGRAM_DUMP(ps_codec->s_cfg.i4_ht_mbs == ps_proc->i4_mb_y);
2300 
2301     return error_status;
2302 }
2303 
2304 /**
2305 *******************************************************************************
2306 *
2307 * @brief
2308 *  Function to update rc context after encoding
2309 *
2310 * @par   Description
2311 *  This function updates the rate control context after the frame is encoded.
2312 *  Number of bits consumed by the current frame, frame distortion, frame cost,
2313 *  number of intra/inter mb's, ... are passed on to rate control context for
2314 *  updating the rc model.
2315 *
2316 * @param[in] ps_codec
2317 *  Handle to codec context
2318 *
2319 * @param[in] ctxt_sel
2320 *  frame context selector
2321 *
2322 * @param[in] pic_cnt
2323 *  pic count
2324 *
2325 * @returns i4_stuffing_byte
2326 *  number of stuffing bytes (if necessary)
2327 *
2328 * @remarks
2329 *
2330 *******************************************************************************
2331 */
ih264e_update_rc_post_enc(codec_t * ps_codec,WORD32 ctxt_sel,WORD32 i4_is_first_frm)2332 WORD32 ih264e_update_rc_post_enc(codec_t *ps_codec, WORD32 ctxt_sel, WORD32 i4_is_first_frm)
2333 {
2334     /* proc set base idx */
2335     WORD32 i4_proc_ctxt_sel_base = ctxt_sel ? (MAX_PROCESS_CTXT / 2) : 0;
2336 
2337     /* proc ctxt */
2338     process_ctxt_t *ps_proc = &ps_codec->as_process[i4_proc_ctxt_sel_base];
2339 
2340     /* frame qp */
2341     UWORD8 u1_frame_qp = ps_codec->u4_frame_qp;
2342 
2343     /* cbr rc return status */
2344     WORD32 i4_stuffing_byte = 0;
2345 
2346     /* current frame stats */
2347     frame_info_t s_frame_info;
2348     picture_type_e rc_pic_type;
2349 
2350     /* temp var */
2351     WORD32 i, j;
2352 
2353     /********************************************************************/
2354     /*                            BEGIN INIT                            */
2355     /********************************************************************/
2356 
2357     /* init frame info */
2358     irc_init_frame_info(&s_frame_info);
2359 
2360     /* get frame info */
2361     for (i = 0; i < (WORD32)ps_codec->s_cfg.u4_num_cores; i++)
2362     {
2363         /*****************************************************************/
2364         /* One frame can be encoded by max of u4_num_cores threads       */
2365         /* Accumulating the num mbs, sad, qp and intra_mb_cost from      */
2366         /* u4_num_cores threads                                          */
2367         /*****************************************************************/
2368         for (j = 0; j< MAX_MB_TYPE; j++)
2369         {
2370             s_frame_info.num_mbs[j] += ps_proc[i].s_frame_info.num_mbs[j];
2371 
2372             s_frame_info.tot_mb_sad[j] += ps_proc[i].s_frame_info.tot_mb_sad[j];
2373 
2374             s_frame_info.qp_sum[j] += ps_proc[i].s_frame_info.qp_sum[j];
2375         }
2376 
2377         s_frame_info.intra_mb_cost_sum += ps_proc[i].s_frame_info.intra_mb_cost_sum;
2378 
2379         s_frame_info.activity_sum += ps_proc[i].s_frame_info.activity_sum;
2380 
2381         /*****************************************************************/
2382         /* gather number of residue and header bits consumed by the frame*/
2383         /*****************************************************************/
2384         ih264e_update_rc_bits_info(&s_frame_info, &ps_proc[i].s_entropy);
2385     }
2386 
2387     /* get pic type */
2388     switch (ps_codec->pic_type)
2389     {
2390         case PIC_I:
2391         case PIC_IDR:
2392             rc_pic_type = I_PIC;
2393             break;
2394         case PIC_P:
2395             rc_pic_type = P_PIC;
2396             break;
2397         case PIC_B:
2398             rc_pic_type = B_PIC;
2399             break;
2400         default:
2401             assert(0);
2402             break;
2403     }
2404 
2405     /* update rc lib with current frame stats */
2406     i4_stuffing_byte =  ih264e_rc_post_enc(ps_codec->s_rate_control.pps_rate_control_api,
2407                                           &(s_frame_info),
2408                                           ps_codec->s_rate_control.pps_pd_frm_rate,
2409                                           ps_codec->s_rate_control.pps_time_stamp,
2410                                           ps_codec->s_rate_control.pps_frame_time,
2411                                           (ps_proc->i4_wd_mbs * ps_proc->i4_ht_mbs),
2412                                           &rc_pic_type,
2413                                           i4_is_first_frm,
2414                                           &ps_codec->s_rate_control.post_encode_skip[ctxt_sel],
2415                                           u1_frame_qp,
2416                                           &ps_codec->s_rate_control.num_intra_in_prev_frame,
2417                                           &ps_codec->s_rate_control.i4_avg_activity);
2418     return i4_stuffing_byte;
2419 }
2420 
2421 /**
2422 *******************************************************************************
2423 *
2424 * @brief
2425 *  entry point of a spawned encoder thread
2426 *
2427 * @par Description:
2428 *  The encoder thread dequeues a proc/entropy job from the encoder queue and
2429 *  calls necessary routines.
2430 *
2431 * @param[in] pv_proc
2432 *  Process context corresponding to the thread
2433 *
2434 * @returns  error status
2435 *
2436 * @remarks
2437 *
2438 *******************************************************************************
2439 */
ih264e_process_thread(void * pv_proc)2440 WORD32 ih264e_process_thread(void *pv_proc)
2441 {
2442     /* error status */
2443     IH264_ERROR_T ret = IH264_SUCCESS;
2444     WORD32 error_status = IH264_SUCCESS;
2445 
2446     /* proc ctxt */
2447     process_ctxt_t *ps_proc = pv_proc;
2448 
2449     /* codec ctxt */
2450     codec_t *ps_codec = ps_proc->ps_codec;
2451 
2452     /* structure to represent a processing job entry */
2453     job_t s_job;
2454 
2455     /* blocking call : entropy dequeue is non-blocking till all
2456      * the proc jobs are processed */
2457     WORD32 is_blocking = 0;
2458 
2459     /* set affinity */
2460     ithread_set_affinity(ps_proc->i4_id);
2461 
2462     while(1)
2463     {
2464         /* dequeue a job from the entropy queue */
2465         {
2466             int error = ithread_mutex_lock(ps_codec->pv_entropy_mutex);
2467 
2468             /* codec context selector */
2469             WORD32 ctxt_sel = ps_codec->i4_encode_api_call_cnt % MAX_CTXT_SETS;
2470 
2471             volatile UWORD32 *pu4_buf = &ps_codec->au4_entropy_thread_active[ctxt_sel];
2472 
2473             /* have the lock */
2474             if (error == 0)
2475             {
2476                 if (*pu4_buf == 0)
2477                 {
2478                     /* no entropy threads are active, try dequeuing a job from the entropy queue */
2479                     ret = ih264_list_dequeue(ps_proc->pv_entropy_jobq, &s_job, is_blocking);
2480                     if (IH264_SUCCESS == ret)
2481                     {
2482                         *pu4_buf = 1;
2483                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2484                         goto WORKER;
2485                     }
2486                     else if(is_blocking)
2487                     {
2488                         ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2489                         break;
2490                     }
2491                 }
2492                 ithread_mutex_unlock(ps_codec->pv_entropy_mutex);
2493             }
2494         }
2495 
2496         /* dequeue a job from the process queue */
2497         ret = ih264_list_dequeue(ps_proc->pv_proc_jobq, &s_job, 1);
2498         if (IH264_SUCCESS != ret)
2499         {
2500             if(ps_proc->i4_id)
2501                 break;
2502             else
2503             {
2504                 is_blocking = 1;
2505                 continue;
2506             }
2507         }
2508 
2509 WORKER:
2510         /* choose appropriate proc context based on proc_base_idx */
2511         ps_proc = &ps_codec->as_process[ps_proc->i4_id + s_job.i2_proc_base_idx];
2512 
2513         switch (s_job.i4_cmd)
2514         {
2515             case CMD_PROCESS:
2516                 ps_proc->i4_mb_cnt = s_job.i2_mb_cnt;
2517                 ps_proc->i4_mb_x = s_job.i2_mb_x;
2518                 ps_proc->i4_mb_y = s_job.i2_mb_y;
2519 
2520                 /* init process context */
2521                 ih264e_init_proc_ctxt(ps_proc);
2522 
2523                 /* core code all mbs enlisted under the current job */
2524                 error_status |= ih264e_process(ps_proc);
2525                 break;
2526 
2527             case CMD_ENTROPY:
2528                 ps_proc->s_entropy.i4_mb_x = s_job.i2_mb_x;
2529                 ps_proc->s_entropy.i4_mb_y = s_job.i2_mb_y;
2530                 ps_proc->s_entropy.i4_mb_cnt = s_job.i2_mb_cnt;
2531 
2532                 /* init entropy */
2533                 ih264e_init_entropy_ctxt(ps_proc);
2534 
2535                 /* entropy code all mbs enlisted under the current job */
2536                 error_status |= ih264e_entropy(ps_proc);
2537                 break;
2538 
2539             default:
2540                 error_status |= IH264_FAIL;
2541                 break;
2542         }
2543     }
2544 
2545     /* send error code */
2546     ps_proc->i4_error_code = error_status;
2547     return ret;
2548 }
2549