1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef VP9_ENCODER_VP9_ENCODER_H_
12 #define VP9_ENCODER_VP9_ENCODER_H_
13 
14 #include <stdio.h>
15 
16 #include "./vpx_config.h"
17 #include "vpx_ports/mem.h"
18 #include "vpx/internal/vpx_codec_internal.h"
19 #include "vpx/vp8cx.h"
20 
21 #include "vp9/common/vp9_ppflags.h"
22 #include "vp9/common/vp9_entropy.h"
23 #include "vp9/common/vp9_entropymode.h"
24 #include "vp9/common/vp9_onyxc_int.h"
25 
26 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
27 #include "vp9/encoder/vp9_context_tree.h"
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_firstpass.h"
30 #include "vp9/encoder/vp9_lookahead.h"
31 #include "vp9/encoder/vp9_mbgraph.h"
32 #include "vp9/encoder/vp9_mcomp.h"
33 #include "vp9/encoder/vp9_quantize.h"
34 #include "vp9/encoder/vp9_ratectrl.h"
35 #include "vp9/encoder/vp9_rd.h"
36 #include "vp9/encoder/vp9_speed_features.h"
37 #include "vp9/encoder/vp9_svc_layercontext.h"
38 #include "vp9/encoder/vp9_tokenize.h"
39 #include "vp9/encoder/vp9_variance.h"
40 #if CONFIG_VP9_TEMPORAL_DENOISING
41 #include "vp9/encoder/vp9_denoiser.h"
42 #endif
43 
44 #ifdef __cplusplus
45 extern "C" {
46 #endif
47 
48 #define DEFAULT_GF_INTERVAL         10
49 
50 typedef struct {
51   int nmvjointcost[MV_JOINTS];
52   int nmvcosts[2][MV_VALS];
53   int nmvcosts_hp[2][MV_VALS];
54 
55   vp9_prob segment_pred_probs[PREDICTION_PROBS];
56 
57   unsigned char *last_frame_seg_map_copy;
58 
59   // 0 = Intra, Last, GF, ARF
60   signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];
61   // 0 = ZERO_MV, MV
62   signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
63 
64   FRAME_CONTEXT fc;
65 } CODING_CONTEXT;
66 
67 
68 typedef enum {
69   // encode_breakout is disabled.
70   ENCODE_BREAKOUT_DISABLED = 0,
71   // encode_breakout is enabled.
72   ENCODE_BREAKOUT_ENABLED = 1,
73   // encode_breakout is enabled with small max_thresh limit.
74   ENCODE_BREAKOUT_LIMITED = 2
75 } ENCODE_BREAKOUT_TYPE;
76 
77 typedef enum {
78   NORMAL      = 0,
79   FOURFIVE    = 1,
80   THREEFIVE   = 2,
81   ONETWO      = 3
82 } VPX_SCALING;
83 
84 typedef enum {
85   // Good Quality Fast Encoding. The encoder balances quality with the
86   // amount of time it takes to encode the output. (speed setting
87   // controls how fast)
88   ONE_PASS_GOOD = 1,
89 
90   // One Pass - Best Quality. The encoder places priority on the
91   // quality of the output over encoding speed. The output is compressed
92   // at the highest possible quality. This option takes the longest
93   // amount of time to encode. (speed setting ignored)
94   ONE_PASS_BEST = 2,
95 
96   // Two Pass - First Pass. The encoder generates a file of statistics
97   // for use in the second encoding pass. (speed setting controls how fast)
98   TWO_PASS_FIRST = 3,
99 
100   // Two Pass - Second Pass. The encoder uses the statistics that were
101   // generated in the first encoding pass to create the compressed
102   // output. (speed setting controls how fast)
103   TWO_PASS_SECOND_GOOD = 4,
104 
105   // Two Pass - Second Pass Best.  The encoder uses the statistics that
106   // were generated in the first encoding pass to create the compressed
107   // output using the highest possible quality, and taking a
108   // longer amount of time to encode. (speed setting ignored)
109   TWO_PASS_SECOND_BEST = 5,
110 
111   // Realtime/Live Encoding. This mode is optimized for realtime
112   // encoding (for example, capturing a television signal or feed from
113   // a live camera). (speed setting controls how fast)
114   REALTIME = 6,
115 } MODE;
116 
117 typedef enum {
118   FRAMEFLAGS_KEY    = 1 << 0,
119   FRAMEFLAGS_GOLDEN = 1 << 1,
120   FRAMEFLAGS_ALTREF = 1 << 2,
121 } FRAMETYPE_FLAGS;
122 
123 typedef enum {
124   NO_AQ = 0,
125   VARIANCE_AQ = 1,
126   COMPLEXITY_AQ = 2,
127   CYCLIC_REFRESH_AQ = 3,
128   AQ_MODE_COUNT  // This should always be the last member of the enum
129 } AQ_MODE;
130 
131 
132 typedef struct VP9EncoderConfig {
133   BITSTREAM_PROFILE profile;
134   BIT_DEPTH bit_depth;
135   int width;  // width of data passed to the compressor
136   int height;  // height of data passed to the compressor
137   double framerate;  // set to passed in framerate
138   int64_t target_bandwidth;  // bandwidth to be used in kilobits per second
139 
140   int noise_sensitivity;  // pre processing blur: recommendation 0
141   int sharpness;  // sharpening output: recommendation 0:
142   int speed;
143   unsigned int rc_max_intra_bitrate_pct;
144 
145   MODE mode;
146   int pass;
147 
148   // Key Framing Operations
149   int auto_key;  // autodetect cut scenes and set the keyframes
150   int key_freq;  // maximum distance to key frame.
151 
152   int lag_in_frames;  // how many frames lag before we start encoding
153 
154   // ----------------------------------------------------------------
155   // DATARATE CONTROL OPTIONS
156 
157   // vbr, cbr, constrained quality or constant quality
158   enum vpx_rc_mode rc_mode;
159 
160   // buffer targeting aggressiveness
161   int under_shoot_pct;
162   int over_shoot_pct;
163 
164   // buffering parameters
165   int64_t starting_buffer_level_ms;
166   int64_t optimal_buffer_level_ms;
167   int64_t maximum_buffer_size_ms;
168 
169   // Frame drop threshold.
170   int drop_frames_water_mark;
171 
172   // controlling quality
173   int fixed_q;
174   int worst_allowed_q;
175   int best_allowed_q;
176   int cq_level;
177   AQ_MODE aq_mode;  // Adaptive Quantization mode
178 
179   // Internal frame size scaling.
180   int allow_spatial_resampling;
181   int scaled_frame_width;
182   int scaled_frame_height;
183 
184   // Enable feature to reduce the frame quantization every x frames.
185   int frame_periodic_boost;
186 
187   // two pass datarate control
188   int two_pass_vbrbias;        // two pass datarate control tweaks
189   int two_pass_vbrmin_section;
190   int two_pass_vbrmax_section;
191   // END DATARATE CONTROL OPTIONS
192   // ----------------------------------------------------------------
193 
194   // Spatial and temporal scalability.
195   int ss_number_layers;  // Number of spatial layers.
196   int ts_number_layers;  // Number of temporal layers.
197   // Bitrate allocation for spatial layers.
198   int ss_target_bitrate[VPX_SS_MAX_LAYERS];
199   int ss_play_alternate[VPX_SS_MAX_LAYERS];
200   // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
201   int ts_target_bitrate[VPX_TS_MAX_LAYERS];
202   int ts_rate_decimator[VPX_TS_MAX_LAYERS];
203 
204   // these parameters aren't to be used in final build don't use!!!
205   int play_alternate;
206 
207   int encode_breakout;  // early breakout : for video conf recommend 800
208 
209   /* Bitfield defining the error resiliency features to enable.
210    * Can provide decodable frames after losses in previous
211    * frames and decodable partitions after losses in the same frame.
212    */
213   unsigned int error_resilient_mode;
214 
215   /* Bitfield defining the parallel decoding mode where the
216    * decoding in successive frames may be conducted in parallel
217    * just by decoding the frame headers.
218    */
219   unsigned int frame_parallel_decoding_mode;
220 
221   int arnr_max_frames;
222   int arnr_strength;
223   int arnr_type;
224 
225   int tile_columns;
226   int tile_rows;
227 
228   struct vpx_fixed_buf         two_pass_stats_in;
229   struct vpx_codec_pkt_list  *output_pkt_list;
230 
231 #if CONFIG_FP_MB_STATS
232   struct vpx_fixed_buf         firstpass_mb_stats_in;
233 #endif
234 
235   vp8e_tuning tuning;
236   vp9e_tune_content content;
237 } VP9EncoderConfig;
238 
is_lossless_requested(const VP9EncoderConfig * cfg)239 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
240   return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
241 }
242 
is_best_mode(MODE mode)243 static INLINE int is_best_mode(MODE mode) {
244   return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST;
245 }
246 
247 typedef struct VP9_COMP {
248   QUANTS quants;
249   MACROBLOCK mb;
250   VP9_COMMON common;
251   VP9EncoderConfig oxcf;
252   struct lookahead_ctx    *lookahead;
253   struct lookahead_entry  *source;
254   struct lookahead_entry  *alt_ref_source;
255   struct lookahead_entry  *last_source;
256 
257   YV12_BUFFER_CONFIG *Source;
258   YV12_BUFFER_CONFIG *Last_Source;  // NULL for first frame and alt_ref frames
259   YV12_BUFFER_CONFIG *un_scaled_source;
260   YV12_BUFFER_CONFIG scaled_source;
261   YV12_BUFFER_CONFIG *unscaled_last_source;
262   YV12_BUFFER_CONFIG scaled_last_source;
263 
264   int gold_is_last;  // gold same as last frame ( short circuit gold searches)
265   int alt_is_last;  // Alt same as last ( short circuit altref search)
266   int gold_is_alt;  // don't do both alt and gold search ( just do gold).
267 
268   int skippable_frame;
269 
270   int scaled_ref_idx[3];
271   int lst_fb_idx;
272   int gld_fb_idx;
273   int alt_fb_idx;
274 
275   int refresh_last_frame;
276   int refresh_golden_frame;
277   int refresh_alt_ref_frame;
278 
279   int ext_refresh_frame_flags_pending;
280   int ext_refresh_last_frame;
281   int ext_refresh_golden_frame;
282   int ext_refresh_alt_ref_frame;
283 
284   int ext_refresh_frame_context_pending;
285   int ext_refresh_frame_context;
286 
287   YV12_BUFFER_CONFIG last_frame_uf;
288 
289   TOKENEXTRA *tok;
290   unsigned int tok_count[4][1 << 6];
291 
292   // Ambient reconstruction err target for force key frames
293   int ambient_err;
294 
295   RD_OPT rd;
296 
297   CODING_CONTEXT coding_context;
298 
299   int zbin_mode_boost;
300   int zbin_mode_boost_enabled;
301   int active_arnr_frames;           // <= cpi->oxcf.arnr_max_frames
302   int active_arnr_strength;         // <= cpi->oxcf.arnr_max_strength
303 
304   int64_t last_time_stamp_seen;
305   int64_t last_end_time_stamp_seen;
306   int64_t first_time_stamp_ever;
307 
308   RATE_CONTROL rc;
309 
310   vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
311 
312   struct vpx_codec_pkt_list  *output_pkt_list;
313 
314   MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
315   int mbgraph_n_frames;             // number of frames filled in the above
316   int static_mb_pct;                // % forced skip mbs by segmentation
317   int ref_frame_flags;
318 
319   SPEED_FEATURES sf;
320 
321   unsigned int max_mv_magnitude;
322   int mv_step_param;
323 
324   // Default value is 1. From first pass stats, encode_breakout may be disabled.
325   ENCODE_BREAKOUT_TYPE allow_encode_breakout;
326 
327   // Get threshold from external input. A suggested threshold is 800 for HD
328   // clips, and 300 for < HD clips.
329   int encode_breakout;
330 
331   unsigned char *segmentation_map;
332 
333   // segment threashold for encode breakout
334   int  segment_encode_breakout[MAX_SEGMENTS];
335 
336   unsigned char *complexity_map;
337 
338   CYCLIC_REFRESH *cyclic_refresh;
339 
340   fractional_mv_step_fp *find_fractional_mv_step;
341   vp9_full_search_fn_t full_search_sad;
342   vp9_refining_search_fn_t refining_search_sad;
343   vp9_diamond_search_fn_t diamond_search_sad;
344   vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
345   uint64_t time_receive_data;
346   uint64_t time_compress_data;
347   uint64_t time_pick_lpf;
348   uint64_t time_encode_sb_row;
349 
350 #if CONFIG_FP_MB_STATS
351   int use_fp_mb_stats;
352 #endif
353 
354   TWO_PASS twopass;
355 
356   YV12_BUFFER_CONFIG alt_ref_buffer;
357   YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
358 
359 #if CONFIG_INTERNAL_STATS
360   unsigned int mode_chosen_counts[MAX_MODES];
361 
362   int    count;
363   double total_y;
364   double total_u;
365   double total_v;
366   double total;
367   uint64_t total_sq_error;
368   uint64_t total_samples;
369 
370   double totalp_y;
371   double totalp_u;
372   double totalp_v;
373   double totalp;
374   uint64_t totalp_sq_error;
375   uint64_t totalp_samples;
376 
377   int    bytes;
378   double summed_quality;
379   double summed_weights;
380   double summedp_quality;
381   double summedp_weights;
382   unsigned int tot_recode_hits;
383 
384 
385   double total_ssimg_y;
386   double total_ssimg_u;
387   double total_ssimg_v;
388   double total_ssimg_all;
389 
390   int b_calculate_ssimg;
391 #endif
392   int b_calculate_psnr;
393 
394   int droppable;
395 
396   int dummy_packing;    /* flag to indicate if packing is dummy */
397 
398   unsigned int tx_stepdown_count[TX_SIZES];
399 
400   int initial_width;
401   int initial_height;
402 
403   int use_svc;
404 
405   SVC svc;
406 
407   // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
408   diff *source_diff_var;
409   // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
410   unsigned int source_var_thresh;
411   int frames_till_next_var_check;
412 
413   int frame_flags;
414 
415   search_site_config ss_cfg;
416 
417   int mbmode_cost[INTRA_MODES];
418   unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
419   int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
420   int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
421   int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
422 
423   PICK_MODE_CONTEXT *leaf_tree;
424   PC_TREE *pc_tree;
425   PC_TREE *pc_root;
426   int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
427 
428   int multi_arf_allowed;
429   int multi_arf_enabled;
430   int multi_arf_last_grp_enabled;
431 
432 #if CONFIG_VP9_TEMPORAL_DENOISING
433   VP9_DENOISER denoiser;
434 #endif
435 } VP9_COMP;
436 
437 void vp9_initialize_enc();
438 
439 struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf);
440 void vp9_remove_compressor(VP9_COMP *cpi);
441 
442 void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf);
443 
444   // receive a frames worth of data. caller can assume that a copy of this
445   // frame is made and not just a copy of the pointer..
446 int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
447                           YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
448                           int64_t end_time_stamp);
449 
450 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
451                             size_t *size, uint8_t *dest,
452                             int64_t *time_stamp, int64_t *time_end, int flush);
453 
454 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
455                               vp9_ppflags_t *flags);
456 
457 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags);
458 
459 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
460 
461 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
462                            YV12_BUFFER_CONFIG *sd);
463 
464 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
465                           YV12_BUFFER_CONFIG *sd);
466 
467 int vp9_update_entropy(VP9_COMP *cpi, int update);
468 
469 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
470 
471 int vp9_set_internal_size(VP9_COMP *cpi,
472                           VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
473 
474 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
475                          unsigned int height);
476 
477 void vp9_set_svc(VP9_COMP *cpi, int use_svc);
478 
479 int vp9_get_quantizer(struct VP9_COMP *cpi);
480 
get_ref_frame_idx(const VP9_COMP * cpi,MV_REFERENCE_FRAME ref_frame)481 static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
482                                     MV_REFERENCE_FRAME ref_frame) {
483   if (ref_frame == LAST_FRAME) {
484     return cpi->lst_fb_idx;
485   } else if (ref_frame == GOLDEN_FRAME) {
486     return cpi->gld_fb_idx;
487   } else {
488     return cpi->alt_fb_idx;
489   }
490 }
491 
get_ref_frame_buffer(VP9_COMP * cpi,MV_REFERENCE_FRAME ref_frame)492 static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
493     VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
494   VP9_COMMON * const cm = &cpi->common;
495   return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
496       .buf;
497 }
498 
499 // Intra only frames, golden frames (except alt ref overlays) and
500 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const VP9_COMP * cpi)501 static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
502   return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
503          (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
504          vp9_is_upper_layer_key_frame(cpi);
505 }
506 
get_token_alloc(int mb_rows,int mb_cols)507 static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
508   // TODO(JBB): double check we can't exceed this token count if we have a
509   // 32x32 transform crossing a boundary at a multiple of 16.
510   // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
511   // resolution. We assume up to 1 token per pixel, and then allow
512   // a head room of 4.
513   return mb_rows * mb_cols * (16 * 16 * 3 + 4);
514 }
515 
516 int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
517 
518 void vp9_alloc_compressor_data(VP9_COMP *cpi);
519 
520 void vp9_scale_references(VP9_COMP *cpi);
521 
522 void vp9_update_reference_frames(VP9_COMP *cpi);
523 
524 int64_t vp9_rescale(int64_t val, int64_t num, int denom);
525 
526 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
527 
528 YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
529                                           YV12_BUFFER_CONFIG *unscaled,
530                                           YV12_BUFFER_CONFIG *scaled);
531 
532 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
533 
is_spatial_svc(const struct VP9_COMP * const cpi)534 static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) {
535   return cpi->use_svc &&
536          cpi->svc.number_temporal_layers == 1 &&
537          cpi->svc.number_spatial_layers > 1;
538 }
539 
is_altref_enabled(const VP9_COMP * const cpi)540 static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
541   return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
542          (cpi->oxcf.play_alternate &&
543           (!is_spatial_svc(cpi) ||
544            cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));
545 }
546 
set_ref_ptrs(VP9_COMMON * cm,MACROBLOCKD * xd,MV_REFERENCE_FRAME ref0,MV_REFERENCE_FRAME ref1)547 static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
548                                 MV_REFERENCE_FRAME ref0,
549                                 MV_REFERENCE_FRAME ref1) {
550   xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
551                                                          : 0];
552   xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
553                                                          : 0];
554 }
555 
get_chessboard_index(const int frame_index)556 static INLINE int get_chessboard_index(const int frame_index) {
557   return frame_index & 0x1;
558 }
559 
560 #ifdef __cplusplus
561 }  // extern "C"
562 #endif
563 
564 #endif  // VP9_ENCODER_VP9_ENCODER_H_
565