1 /* ------------------------------------------------------------------
2  * Copyright (C) 1998-2009 PacketVideo
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
13  * express or implied.
14  * See the License for the specific language governing permissions
15  * and limitations under the License.
16  * -------------------------------------------------------------------
17  */
18 /**
19 This file contains application function interfaces to the AVC encoder library
20 and necessary type defitionitions and enumerations.
21 @publishedAll
22 */
23 
24 #ifndef AVCENC_INT_H_INCLUDED
25 #define AVCENC_INT_H_INCLUDED
26 
27 #ifndef AVCINT_COMMON_H_INCLUDED
28 #include "avcint_common.h"
29 #endif
30 #ifndef AVCENC_API_H_INCLUDED
31 #include "avcenc_api.h"
32 #endif
33 
34 typedef float OsclFloat;
35 
36 /* Definition for the structures below */
37 #define DEFAULT_ATTR    0 /* default memory attribute */
38 #define MAX_INPUT_FRAME 30 /* some arbitrary number, it can be much higher than this. */
39 #define MAX_REF_FRAME  16 /* max size of the RefPicList0 and RefPicList1 */
40 #define MAX_REF_PIC_LIST 33
41 
42 #define MIN_QP          0
43 #define MAX_QP          51
44 #define SHIFT_QP        12
45 #define  LAMBDA_ACCURACY_BITS         16
46 #define  LAMBDA_FACTOR(lambda)        ((int)((double)(1<<LAMBDA_ACCURACY_BITS)*lambda+0.5))
47 
48 
49 #define DISABLE_THRESHOLDING  0
50 // for better R-D performance
51 #define _LUMA_COEFF_COST_       4 //!< threshold for luma coeffs
52 #define _CHROMA_COEFF_COST_     4 //!< threshold for chroma coeffs, used to be 7
53 #define _LUMA_MB_COEFF_COST_    5 //!< threshold for luma coeffs of inter Macroblocks
54 #define _LUMA_8x8_COEFF_COST_   5 //!< threshold for luma coeffs of 8x8 Inter Partition
55 #define MAX_VALUE       999999   //!< used for start value for some variables
56 
57 #define  WEIGHTED_COST(factor,bits)   (((factor)*(bits))>>LAMBDA_ACCURACY_BITS)
58 #define  MV_COST(f,s,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[((cx)<<(s))-px]+mvbits[((cy)<<(s))-py]))
59 #define  MV_COST_S(f,cx,cy,px,py)     (WEIGHTED_COST(f,mvbits[cx-px]+mvbits[cy-py]))
60 
61 /* for sub-pel search and interpolation */
62 #define SUBPEL_PRED_BLK_SIZE 576 // 24x24
63 #define REF_CENTER 75
64 #define V2Q_H0Q 1
65 #define V0Q_H2Q 2
66 #define V2Q_H2Q 3
67 
68 /*
69 #define V3Q_H0Q 1
70 #define V3Q_H1Q 2
71 #define V0Q_H1Q 3
72 #define V1Q_H1Q 4
73 #define V1Q_H0Q 5
74 #define V1Q_H3Q 6
75 #define V0Q_H3Q 7
76 #define V3Q_H3Q 8
77 #define V2Q_H3Q 9
78 #define V2Q_H0Q 10
79 #define V2Q_H1Q 11
80 #define V2Q_H2Q 12
81 #define V3Q_H2Q 13
82 #define V0Q_H2Q 14
83 #define V1Q_H2Q 15
84 */
85 
86 
87 #define DEFAULT_OVERRUN_BUFFER_SIZE 1000
88 
89 // associated with the above cost model
90 const uint8 COEFF_COST[2][16] =
91 {
92     {3, 2, 2, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
93     {9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}
94 };
95 
96 
97 
98 //! convert from H.263 QP to H.264 quant given by: quant=pow(2,QP/6)
99 const int QP2QUANT[40] =
100 {
101     1, 1, 1, 1, 2, 2, 2, 2,
102     3, 3, 3, 4, 4, 4, 5, 6,
103     6, 7, 8, 9, 10, 11, 13, 14,
104     16, 18, 20, 23, 25, 29, 32, 36,
105     40, 45, 51, 57, 64, 72, 81, 91
106 };
107 
108 
109 /**
110 This enumeration keeps track of the internal status of the encoder whether it is doing
111 something. The encoding flow follows the order in which these states are.
112 @publishedAll
113 */
114 typedef enum
115 {
116     AVCEnc_Initializing = 0,
117     AVCEnc_Encoding_SPS,
118     AVCEnc_Encoding_PPS,
119     AVCEnc_Analyzing_Frame,
120     AVCEnc_WaitingForBuffer,  // pending state
121     AVCEnc_Encoding_Frame,
122 } AVCEnc_State ;
123 
124 /**
125 Bitstream structure contains bitstream related parameters such as the pointer
126 to the buffer, the current byte position and bit position. The content of the
127 bitstreamBuffer will be in EBSP format as the emulation prevention codes are
128 automatically inserted as the RBSP is recorded.
129 @publishedAll
130 */
131 typedef struct tagEncBitstream
132 {
133     uint8 *bitstreamBuffer; /* pointer to buffer memory   */
134     int buf_size;       /* size of the buffer memory */
135     int write_pos;      /* next position to write to bitstreamBuffer  */
136     int count_zeros;   /* count number of consecutive zero */
137     uint current_word;  /* byte-swapped (MSB left) current word to write to buffer */
138     int bit_left;      /* number of bit left in current_word */
139     uint8   *overrunBuffer;  /* extra output buffer to prevent current skip due to output buffer overrun*/
140     int     oBSize;     /* size of allocated overrun buffer */
141     void   *encvid; /* pointer to the main object */
142 
143 } AVCEncBitstream;
144 
145 /**
146 This structure is used for rate control purpose and other performance related control
147 variables such as, RD cost, statistics, motion search stuffs, etc.
148 should be in this structure.
149 @publishedAll
150 */
151 
152 
153 typedef struct tagRDInfo
154 {
155     int QP;
156     int actual_bits;
157     OsclFloat mad;
158     OsclFloat R_D;
159 } RDInfo;
160 
161 typedef struct tagMultiPass
162 {
163     /* multipass rate control data */
164     int target_bits;    /* target bits for current frame, = rc->T */
165     int actual_bits;    /* actual bits for current frame obtained after encoding, = rc->Rc*/
166     int QP;             /* quantization level for current frame, = rc->Qc*/
167     int prev_QP;        /* quantization level for previous frame */
168     int prev_prev_QP;   /* quantization level for previous frame before last*/
169     OsclFloat mad;          /* mad for current frame, = video->avgMAD*/
170     int bitrate;        /* bitrate for current frame */
171     OsclFloat framerate;    /* framerate for current frame*/
172 
173     int nRe_Quantized;  /* control variable for multipass encoding, */
174     /* 0 : first pass */
175     /* 1 : intermediate pass(quantization and VLC loop only) */
176     /* 2 : final pass(de-quantization, idct, etc) */
177     /* 3 : macroblock level rate control */
178 
179     int encoded_frames;     /* counter for all encoded frames */
180     int re_encoded_frames;  /* counter for all multipass encoded frames*/
181     int re_encoded_times;   /* counter for all times of multipass frame encoding */
182 
183     /* Multiple frame prediction*/
184     RDInfo **pRDSamples;        /* pRDSamples[30][32], 30->30fps, 32 -> 5 bit quantizer, 32 candidates*/
185     int framePos;               /* specific position in previous multiple frames*/
186     int frameRange;             /* number of overall previous multiple frames */
187     int samplesPerFrame[30];    /* number of samples per frame, 30->30fps */
188 
189     /* Bit allocation for scene change frames and high motion frames */
190     OsclFloat sum_mad;
191     int counter_BTsrc;  /* BT = Bit Transfer, bit transfer from low motion frames or less complicatedly compressed frames */
192     int counter_BTdst;  /* BT = Bit Transfer, bit transfer to scene change frames or high motion frames or more complicatedly compressed frames */
193     OsclFloat sum_QP;
194     int diff_counter;   /* diff_counter = -diff_counter_BTdst, or diff_counter_BTsrc */
195 
196     /* For target bitrate or framerate update */
197     OsclFloat target_bits_per_frame;        /* = C = bitrate/framerate */
198     OsclFloat target_bits_per_frame_prev;   /* previous C */
199     OsclFloat aver_mad;                     /* so-far average mad could replace sum_mad */
200     OsclFloat aver_mad_prev;                /* previous average mad */
201     int   overlapped_win_size;          /* transition period of time */
202     int   encoded_frames_prev;          /* previous encoded_frames */
203 } MultiPass;
204 
205 
206 typedef struct tagdataPointArray
207 {
208     int Qp;
209     int Rp;
210     OsclFloat Mp;   /* for MB-based RC */
211     struct tagdataPointArray *next;
212     struct tagdataPointArray *prev;
213 } dataPointArray;
214 
215 typedef struct tagAVCRateControl
216 {
217 
218     /* these parameters are initialized by the users AVCEncParams */
219     /* bitrate-robustness tradeoff */
220     uint scdEnable; /* enable scene change detection */
221     int idrPeriod;  /* IDR period in number of frames */
222     int intraMBRate;   /* intra MB refresh rate per frame */
223     uint dpEnable;  /* enable data partitioning */
224 
225     /* quality-complexity tradeoff */
226     uint subPelEnable;  /* enable quarter pel search */
227     int mvRange;    /* motion vector search range in +/- pixel */
228     uint subMBEnable;  /* enable sub MB prediction mode (4x4, 4x8, 8x4) */
229     uint rdOptEnable;  /* enable RD-opt mode selection */
230     uint twoPass; /* flag for 2 pass encoding ( for future )*/
231     uint bidirPred; /* bi-directional prediction for B-frame. */
232 
233     uint rcEnable;  /* enable rate control, '1' on, '0' const QP */
234     int initQP; /* initial QP */
235 
236     /* note the following 3 params are for HRD, these triplets can be a series
237     of triplets as the generalized HRD allows. SEI message must be generated in this case. */
238     /* We no longer have to differentiate between CBR and VBR. The users to the
239     AVC encoder lib will do the mapping from CBR/VBR to these parameters. */
240     int32 bitRate;  /* target bit rate for the overall clip in bits/second*/
241     int32 cpbSize;  /* coded picture buffer size in bytes */
242     int32 initDelayOffset; /* initial CBP removal delay in bits */
243 
244     OsclFloat frame_rate; /* frame rate */
245     int srcInterval; /* source frame rate in msec */
246     int basicUnit;  /* number of macroblocks per BU */
247 
248     /* Then internal parameters for the operation */
249     uint first_frame; /* a flag for the first frame */
250     int lambda_mf; /* for example */
251     int totalSAD;    /* SAD of current frame */
252 
253     /*******************************************/
254     /* this part comes from MPEG4 rate control */
255     int alpha;  /* weight for I frame */
256     int Rs;     /*bit rate for the sequence (or segment) e.g., 24000 bits/sec */
257     int Rc;     /*bits used for the current frame. It is the bit count obtained after encoding. */
258     int Rp;     /*bits to be removed from the buffer per picture. */
259     /*? is this the average one, or just the bits coded for the previous frame */
260     int Rps;    /*bit to be removed from buffer per src frame */
261     OsclFloat Ts;   /*number of seconds for the sequence  (or segment). e.g., 10 sec */
262     OsclFloat Ep;
263     OsclFloat Ec;   /*mean absolute difference for the current frame after motion compensation.*/
264     /*If the macroblock is intra coded, the original spatial pixel values are summed.*/
265     int Qc;     /*quantization level used for the current frame. */
266     int Nr;     /*number of P frames remaining for encoding.*/
267     int Rr; /*number of bits remaining for encoding this sequence (or segment).*/
268     int Rr_Old;
269     int T;      /*target bit to be used for the current frame.*/
270     int S;      /*number of bits used for encoding the previous frame.*/
271     int Hc; /*header and motion vector bits used in the current frame. It includes all the  information except to the residual information.*/
272     int Hp; /*header and motion vector bits used in the previous frame. It includes all the     information except to the residual information.*/
273     int Ql; /*quantization level used in the previous frame */
274     int Bs; /*buffer size e.g., R/2 */
275     int B;      /*current buffer level e.g., R/4 - start from the middle of the buffer */
276     OsclFloat X1;
277     OsclFloat X2;
278     OsclFloat X11;
279     OsclFloat M;            /*safe margin for the buffer */
280     OsclFloat smTick;    /*ratio of src versus enc frame rate */
281     double remnant;  /*remainder frame of src/enc frame for fine frame skipping */
282     int timeIncRes; /* vol->timeIncrementResolution */
283 
284     dataPointArray   *end; /*quantization levels for the past (20) frames */
285 
286     int     frameNumber; /* ranging from 0 to 20 nodes*/
287     int     w;
288     int     Nr_Original;
289     int     Nr_Old, Nr_Old2;
290     int     skip_next_frame;
291     int     Qdep;       /* smooth Q adjustment */
292     int     VBR_Enabled;
293 
294     int totalFrameNumber; /* total coded frames, for debugging!!*/
295 
296     char    oFirstTime;
297 
298     int numFrameBits; /* keep track of number of bits of the current frame */
299     int NumberofHeaderBits;
300     int NumberofTextureBits;
301     int numMBHeaderBits;
302     int numMBTextureBits;
303     double *MADofMB;
304     int32 bitsPerFrame;
305 
306     /* BX rate control, something like TMN8 rate control*/
307 
308     MultiPass *pMP;
309 
310     int     TMN_W;
311     int     TMN_TH;
312     int     VBV_fullness;
313     int     max_BitVariance_num; /* the number of the maximum bit variance within the given buffer with the unit of 10% of bitrate/framerate*/
314     int     encoded_frames; /* counter for all encoded frames */
315     int     low_bound;              /* bound for underflow detection, usually low_bound=-Bs/2, but could be changed in H.263 mode */
316     int     VBV_fullness_offset;    /* offset of VBV_fullness, usually is zero, but can be changed in H.263 mode*/
317     /* End BX */
318 
319 } AVCRateControl;
320 
321 
322 /**
323 This structure is for the motion vector information. */
324 typedef struct tagMV
325 {
326     int x;
327     int y;
328     uint sad;
329 } AVCMV;
330 
331 /**
332 This structure contains function pointers for different platform dependent implementation of
333 functions. */
334 typedef struct tagAVCEncFuncPtr
335 {
336 
337     int (*SAD_MB_HalfPel[4])(uint8*, uint8*, int, void *);
338     int (*SAD_Macroblock)(uint8 *ref, uint8 *blk, int dmin_lx, void *extra_info);
339 
340 } AVCEncFuncPtr;
341 
342 /**
343 This structure contains information necessary for correct padding.
344 */
345 typedef struct tagPadInfo
346 {
347     int i;
348     int width;
349     int j;
350     int height;
351 } AVCPadInfo;
352 
353 
354 #ifdef HTFM
355 typedef struct tagHTFM_Stat
356 {
357     int abs_dif_mad_avg;
358     uint countbreak;
359     int offsetArray[16];
360     int offsetRef[16];
361 } HTFM_Stat;
362 #endif
363 
364 
365 /**
366 This structure is the main object for AVC encoder library providing access to all
367 global variables. It is allocated at PVAVCInitEncoder and freed at PVAVCCleanUpEncoder.
368 @publishedAll
369 */
370 typedef struct tagEncObject
371 {
372 
373     AVCCommonObj *common;
374 
375     AVCEncBitstream     *bitstream; /* for current NAL */
376     uint8   *overrunBuffer;  /* extra output buffer to prevent current skip due to output buffer overrun*/
377     int     oBSize;     /* size of allocated overrun buffer */
378 
379     /* rate control */
380     AVCRateControl      *rateCtrl; /* pointer to the rate control structure */
381 
382     /* encoding operation */
383     AVCEnc_State        enc_state; /* encoding state */
384 
385     AVCFrameIO          *currInput; /* pointer to the current input frame */
386 
387     int                 currSliceGroup; /* currently encoded slice group id */
388 
389     int     level[24][16], run[24][16]; /* scratch memory */
390     int     leveldc[16], rundc[16]; /* for DC component */
391     int     levelcdc[16], runcdc[16]; /* for chroma DC component */
392     int     numcoefcdc[2]; /* number of coefficient for chroma DC */
393     int     numcoefdc;      /* number of coefficients for DC component */
394 
395     int     qp_const;
396     int     qp_const_c;
397     /********* intra prediction scratch memory **********************/
398     uint8   pred_i16[AVCNumI16PredMode][256]; /* save prediction for MB */
399     uint8   pred_i4[AVCNumI4PredMode][16];  /* save prediction for blk */
400     uint8   pred_ic[AVCNumIChromaMode][128];  /* for 2 chroma */
401 
402     int     mostProbableI4Mode[16]; /* in raster scan order */
403     /********* motion compensation related variables ****************/
404     AVCMV   *mot16x16;          /* Saved motion vectors for 16x16 block*/
405     AVCMV(*mot16x8)[2];     /* Saved motion vectors for 16x8 block*/
406     AVCMV(*mot8x16)[2];     /* Saved motion vectors for 8x16 block*/
407     AVCMV(*mot8x8)[4];      /* Saved motion vectors for 8x8 block*/
408 
409     /********* subpel position **************************************/
410     uint32  subpel_pred[SUBPEL_PRED_BLK_SIZE/*<<2*/]; /* all 16 sub-pel positions  */
411     uint8   *hpel_cand[9];      /* pointer to half-pel position */
412     int     best_hpel_pos;          /* best position */
413     uint8   qpel_cand[8][24*16];        /* pointer to quarter-pel position */
414     int     best_qpel_pos;
415     uint8   *bilin_base[9][4];    /* pointer to 4 position at top left of bilinear quarter-pel */
416 
417     /* need for intra refresh rate */
418     uint8   *intraSearch;       /* Intra Array for MBs to be intra searched */
419     uint    firstIntraRefreshMBIndx; /* keep track for intra refresh */
420 
421     int     i4_sad;             /* temporary for i4 mode SAD */
422     int     *min_cost;          /* Minimum cost for the all MBs */
423     int     lambda_mode;        /* Lagrange parameter for mode selection */
424     int     lambda_motion;      /* Lagrange parameter for MV selection */
425 
426     uint8   *mvbits_array;      /* Table for bits spent in the cost funciton */
427     uint8   *mvbits;            /* An offset to the above array. */
428 
429     /* to speedup the SAD calculation */
430     void *sad_extra_info;
431     uint8 currYMB[256];     /* interleaved current macroblock in HTFM order */
432 
433 #ifdef HTFM
434     int nrmlz_th[48];       /* Threshold for fast SAD calculation using HTFM */
435     HTFM_Stat htfm_stat;    /* For statistics collection */
436 #endif
437 
438     /* statistics */
439     int numIntraMB;         /* keep track of number of intra MB */
440 
441     /* encoding complexity control */
442     uint fullsearch_enable; /* flag to enable full-pel full-search */
443 
444     /* misc.*/
445     bool outOfBandParamSet; /* flag to enable out-of-band param set */
446 
447     AVCSeqParamSet extSPS; /* for external SPS */
448     AVCPicParamSet extPPS; /* for external PPS */
449 
450     /* time control */
451     uint32  prevFrameNum;   /* previous frame number starting from modTimeRef */
452     uint32  modTimeRef;     /* Reference modTime update every I-Vop*/
453     uint32  wrapModTime;    /* Offset to modTime Ref, rarely used */
454 
455     uint    prevProcFrameNum;  /* previously processed frame number, could be skipped */
456     uint    prevCodedFrameNum;  /* previously encoded frame number */
457     /* POC related variables */
458     uint32  dispOrdPOCRef;      /* reference POC is displayer order unit. */
459 
460     /* Function pointers */
461     AVCEncFuncPtr *functionPointer; /* store pointers to platform specific functions */
462 
463     /* Application control data */
464     AVCHandle *avcHandle;
465 
466 
467 } AVCEncObject;
468 
469 
470 #endif /*AVCENC_INT_H_INCLUDED*/
471 
472