1 /*
2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #ifndef VP9_ENCODER_VP9_ENCODER_H_
12 #define VP9_ENCODER_VP9_ENCODER_H_
13
14 #include <stdio.h>
15
16 #include "./vpx_config.h"
17 #include "vpx_ports/mem.h"
18 #include "vpx/internal/vpx_codec_internal.h"
19 #include "vpx/vp8cx.h"
20
21 #include "vp9/common/vp9_ppflags.h"
22 #include "vp9/common/vp9_entropy.h"
23 #include "vp9/common/vp9_entropymode.h"
24 #include "vp9/common/vp9_onyxc_int.h"
25
26 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
27 #include "vp9/encoder/vp9_context_tree.h"
28 #include "vp9/encoder/vp9_encodemb.h"
29 #include "vp9/encoder/vp9_firstpass.h"
30 #include "vp9/encoder/vp9_lookahead.h"
31 #include "vp9/encoder/vp9_mbgraph.h"
32 #include "vp9/encoder/vp9_mcomp.h"
33 #include "vp9/encoder/vp9_quantize.h"
34 #include "vp9/encoder/vp9_ratectrl.h"
35 #include "vp9/encoder/vp9_rd.h"
36 #include "vp9/encoder/vp9_speed_features.h"
37 #include "vp9/encoder/vp9_svc_layercontext.h"
38 #include "vp9/encoder/vp9_tokenize.h"
39 #include "vp9/encoder/vp9_variance.h"
40 #if CONFIG_VP9_TEMPORAL_DENOISING
41 #include "vp9/encoder/vp9_denoiser.h"
42 #endif
43
44 #ifdef __cplusplus
45 extern "C" {
46 #endif
47
48 #define DEFAULT_GF_INTERVAL 10
49
50 typedef struct {
51 int nmvjointcost[MV_JOINTS];
52 int nmvcosts[2][MV_VALS];
53 int nmvcosts_hp[2][MV_VALS];
54
55 vp9_prob segment_pred_probs[PREDICTION_PROBS];
56
57 unsigned char *last_frame_seg_map_copy;
58
59 // 0 = Intra, Last, GF, ARF
60 signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];
61 // 0 = ZERO_MV, MV
62 signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];
63
64 FRAME_CONTEXT fc;
65 } CODING_CONTEXT;
66
67
68 typedef enum {
69 // encode_breakout is disabled.
70 ENCODE_BREAKOUT_DISABLED = 0,
71 // encode_breakout is enabled.
72 ENCODE_BREAKOUT_ENABLED = 1,
73 // encode_breakout is enabled with small max_thresh limit.
74 ENCODE_BREAKOUT_LIMITED = 2
75 } ENCODE_BREAKOUT_TYPE;
76
77 typedef enum {
78 NORMAL = 0,
79 FOURFIVE = 1,
80 THREEFIVE = 2,
81 ONETWO = 3
82 } VPX_SCALING;
83
84 typedef enum {
85 // Good Quality Fast Encoding. The encoder balances quality with the
86 // amount of time it takes to encode the output. (speed setting
87 // controls how fast)
88 ONE_PASS_GOOD = 1,
89
90 // One Pass - Best Quality. The encoder places priority on the
91 // quality of the output over encoding speed. The output is compressed
92 // at the highest possible quality. This option takes the longest
93 // amount of time to encode. (speed setting ignored)
94 ONE_PASS_BEST = 2,
95
96 // Two Pass - First Pass. The encoder generates a file of statistics
97 // for use in the second encoding pass. (speed setting controls how fast)
98 TWO_PASS_FIRST = 3,
99
100 // Two Pass - Second Pass. The encoder uses the statistics that were
101 // generated in the first encoding pass to create the compressed
102 // output. (speed setting controls how fast)
103 TWO_PASS_SECOND_GOOD = 4,
104
105 // Two Pass - Second Pass Best. The encoder uses the statistics that
106 // were generated in the first encoding pass to create the compressed
107 // output using the highest possible quality, and taking a
108 // longer amount of time to encode. (speed setting ignored)
109 TWO_PASS_SECOND_BEST = 5,
110
111 // Realtime/Live Encoding. This mode is optimized for realtime
112 // encoding (for example, capturing a television signal or feed from
113 // a live camera). (speed setting controls how fast)
114 REALTIME = 6,
115 } MODE;
116
117 typedef enum {
118 FRAMEFLAGS_KEY = 1 << 0,
119 FRAMEFLAGS_GOLDEN = 1 << 1,
120 FRAMEFLAGS_ALTREF = 1 << 2,
121 } FRAMETYPE_FLAGS;
122
123 typedef enum {
124 NO_AQ = 0,
125 VARIANCE_AQ = 1,
126 COMPLEXITY_AQ = 2,
127 CYCLIC_REFRESH_AQ = 3,
128 AQ_MODE_COUNT // This should always be the last member of the enum
129 } AQ_MODE;
130
131
132 typedef struct VP9EncoderConfig {
133 BITSTREAM_PROFILE profile;
134 BIT_DEPTH bit_depth;
135 int width; // width of data passed to the compressor
136 int height; // height of data passed to the compressor
137 double framerate; // set to passed in framerate
138 int64_t target_bandwidth; // bandwidth to be used in kilobits per second
139
140 int noise_sensitivity; // pre processing blur: recommendation 0
141 int sharpness; // sharpening output: recommendation 0:
142 int speed;
143 unsigned int rc_max_intra_bitrate_pct;
144
145 MODE mode;
146 int pass;
147
148 // Key Framing Operations
149 int auto_key; // autodetect cut scenes and set the keyframes
150 int key_freq; // maximum distance to key frame.
151
152 int lag_in_frames; // how many frames lag before we start encoding
153
154 // ----------------------------------------------------------------
155 // DATARATE CONTROL OPTIONS
156
157 // vbr, cbr, constrained quality or constant quality
158 enum vpx_rc_mode rc_mode;
159
160 // buffer targeting aggressiveness
161 int under_shoot_pct;
162 int over_shoot_pct;
163
164 // buffering parameters
165 int64_t starting_buffer_level_ms;
166 int64_t optimal_buffer_level_ms;
167 int64_t maximum_buffer_size_ms;
168
169 // Frame drop threshold.
170 int drop_frames_water_mark;
171
172 // controlling quality
173 int fixed_q;
174 int worst_allowed_q;
175 int best_allowed_q;
176 int cq_level;
177 AQ_MODE aq_mode; // Adaptive Quantization mode
178
179 // Internal frame size scaling.
180 int allow_spatial_resampling;
181 int scaled_frame_width;
182 int scaled_frame_height;
183
184 // Enable feature to reduce the frame quantization every x frames.
185 int frame_periodic_boost;
186
187 // two pass datarate control
188 int two_pass_vbrbias; // two pass datarate control tweaks
189 int two_pass_vbrmin_section;
190 int two_pass_vbrmax_section;
191 // END DATARATE CONTROL OPTIONS
192 // ----------------------------------------------------------------
193
194 // Spatial and temporal scalability.
195 int ss_number_layers; // Number of spatial layers.
196 int ts_number_layers; // Number of temporal layers.
197 // Bitrate allocation for spatial layers.
198 int ss_target_bitrate[VPX_SS_MAX_LAYERS];
199 int ss_play_alternate[VPX_SS_MAX_LAYERS];
200 // Bitrate allocation (CBR mode) and framerate factor, for temporal layers.
201 int ts_target_bitrate[VPX_TS_MAX_LAYERS];
202 int ts_rate_decimator[VPX_TS_MAX_LAYERS];
203
204 // these parameters aren't to be used in final build don't use!!!
205 int play_alternate;
206
207 int encode_breakout; // early breakout : for video conf recommend 800
208
209 /* Bitfield defining the error resiliency features to enable.
210 * Can provide decodable frames after losses in previous
211 * frames and decodable partitions after losses in the same frame.
212 */
213 unsigned int error_resilient_mode;
214
215 /* Bitfield defining the parallel decoding mode where the
216 * decoding in successive frames may be conducted in parallel
217 * just by decoding the frame headers.
218 */
219 unsigned int frame_parallel_decoding_mode;
220
221 int arnr_max_frames;
222 int arnr_strength;
223 int arnr_type;
224
225 int tile_columns;
226 int tile_rows;
227
228 struct vpx_fixed_buf two_pass_stats_in;
229 struct vpx_codec_pkt_list *output_pkt_list;
230
231 #if CONFIG_FP_MB_STATS
232 struct vpx_fixed_buf firstpass_mb_stats_in;
233 #endif
234
235 vp8e_tuning tuning;
236 vp9e_tune_content content;
237 } VP9EncoderConfig;
238
is_lossless_requested(const VP9EncoderConfig * cfg)239 static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) {
240 return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0;
241 }
242
is_best_mode(MODE mode)243 static INLINE int is_best_mode(MODE mode) {
244 return mode == ONE_PASS_BEST || mode == TWO_PASS_SECOND_BEST;
245 }
246
247 typedef struct VP9_COMP {
248 QUANTS quants;
249 MACROBLOCK mb;
250 VP9_COMMON common;
251 VP9EncoderConfig oxcf;
252 struct lookahead_ctx *lookahead;
253 struct lookahead_entry *source;
254 struct lookahead_entry *alt_ref_source;
255 struct lookahead_entry *last_source;
256
257 YV12_BUFFER_CONFIG *Source;
258 YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames
259 YV12_BUFFER_CONFIG *un_scaled_source;
260 YV12_BUFFER_CONFIG scaled_source;
261 YV12_BUFFER_CONFIG *unscaled_last_source;
262 YV12_BUFFER_CONFIG scaled_last_source;
263
264 int gold_is_last; // gold same as last frame ( short circuit gold searches)
265 int alt_is_last; // Alt same as last ( short circuit altref search)
266 int gold_is_alt; // don't do both alt and gold search ( just do gold).
267
268 int skippable_frame;
269
270 int scaled_ref_idx[3];
271 int lst_fb_idx;
272 int gld_fb_idx;
273 int alt_fb_idx;
274
275 int refresh_last_frame;
276 int refresh_golden_frame;
277 int refresh_alt_ref_frame;
278
279 int ext_refresh_frame_flags_pending;
280 int ext_refresh_last_frame;
281 int ext_refresh_golden_frame;
282 int ext_refresh_alt_ref_frame;
283
284 int ext_refresh_frame_context_pending;
285 int ext_refresh_frame_context;
286
287 YV12_BUFFER_CONFIG last_frame_uf;
288
289 TOKENEXTRA *tok;
290 unsigned int tok_count[4][1 << 6];
291
292 // Ambient reconstruction err target for force key frames
293 int ambient_err;
294
295 RD_OPT rd;
296
297 CODING_CONTEXT coding_context;
298
299 int zbin_mode_boost;
300 int zbin_mode_boost_enabled;
301 int active_arnr_frames; // <= cpi->oxcf.arnr_max_frames
302 int active_arnr_strength; // <= cpi->oxcf.arnr_max_strength
303
304 int64_t last_time_stamp_seen;
305 int64_t last_end_time_stamp_seen;
306 int64_t first_time_stamp_ever;
307
308 RATE_CONTROL rc;
309
310 vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES];
311
312 struct vpx_codec_pkt_list *output_pkt_list;
313
314 MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS];
315 int mbgraph_n_frames; // number of frames filled in the above
316 int static_mb_pct; // % forced skip mbs by segmentation
317 int ref_frame_flags;
318
319 SPEED_FEATURES sf;
320
321 unsigned int max_mv_magnitude;
322 int mv_step_param;
323
324 // Default value is 1. From first pass stats, encode_breakout may be disabled.
325 ENCODE_BREAKOUT_TYPE allow_encode_breakout;
326
327 // Get threshold from external input. A suggested threshold is 800 for HD
328 // clips, and 300 for < HD clips.
329 int encode_breakout;
330
331 unsigned char *segmentation_map;
332
333 // segment threashold for encode breakout
334 int segment_encode_breakout[MAX_SEGMENTS];
335
336 unsigned char *complexity_map;
337
338 CYCLIC_REFRESH *cyclic_refresh;
339
340 fractional_mv_step_fp *find_fractional_mv_step;
341 vp9_full_search_fn_t full_search_sad;
342 vp9_refining_search_fn_t refining_search_sad;
343 vp9_diamond_search_fn_t diamond_search_sad;
344 vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES];
345 uint64_t time_receive_data;
346 uint64_t time_compress_data;
347 uint64_t time_pick_lpf;
348 uint64_t time_encode_sb_row;
349
350 #if CONFIG_FP_MB_STATS
351 int use_fp_mb_stats;
352 #endif
353
354 TWO_PASS twopass;
355
356 YV12_BUFFER_CONFIG alt_ref_buffer;
357 YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
358
359 #if CONFIG_INTERNAL_STATS
360 unsigned int mode_chosen_counts[MAX_MODES];
361
362 int count;
363 double total_y;
364 double total_u;
365 double total_v;
366 double total;
367 uint64_t total_sq_error;
368 uint64_t total_samples;
369
370 double totalp_y;
371 double totalp_u;
372 double totalp_v;
373 double totalp;
374 uint64_t totalp_sq_error;
375 uint64_t totalp_samples;
376
377 int bytes;
378 double summed_quality;
379 double summed_weights;
380 double summedp_quality;
381 double summedp_weights;
382 unsigned int tot_recode_hits;
383
384
385 double total_ssimg_y;
386 double total_ssimg_u;
387 double total_ssimg_v;
388 double total_ssimg_all;
389
390 int b_calculate_ssimg;
391 #endif
392 int b_calculate_psnr;
393
394 int droppable;
395
396 int dummy_packing; /* flag to indicate if packing is dummy */
397
398 unsigned int tx_stepdown_count[TX_SIZES];
399
400 int initial_width;
401 int initial_height;
402
403 int use_svc;
404
405 SVC svc;
406
407 // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type.
408 diff *source_diff_var;
409 // The threshold used in SOURCE_VAR_BASED_PARTITION search type.
410 unsigned int source_var_thresh;
411 int frames_till_next_var_check;
412
413 int frame_flags;
414
415 search_site_config ss_cfg;
416
417 int mbmode_cost[INTRA_MODES];
418 unsigned inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES];
419 int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES];
420 int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES];
421 int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS];
422
423 PICK_MODE_CONTEXT *leaf_tree;
424 PC_TREE *pc_tree;
425 PC_TREE *pc_root;
426 int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES];
427
428 int multi_arf_allowed;
429 int multi_arf_enabled;
430 int multi_arf_last_grp_enabled;
431
432 #if CONFIG_VP9_TEMPORAL_DENOISING
433 VP9_DENOISER denoiser;
434 #endif
435 } VP9_COMP;
436
437 void vp9_initialize_enc();
438
439 struct VP9_COMP *vp9_create_compressor(VP9EncoderConfig *oxcf);
440 void vp9_remove_compressor(VP9_COMP *cpi);
441
442 void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf);
443
444 // receive a frames worth of data. caller can assume that a copy of this
445 // frame is made and not just a copy of the pointer..
446 int vp9_receive_raw_frame(VP9_COMP *cpi, unsigned int frame_flags,
447 YV12_BUFFER_CONFIG *sd, int64_t time_stamp,
448 int64_t end_time_stamp);
449
450 int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags,
451 size_t *size, uint8_t *dest,
452 int64_t *time_stamp, int64_t *time_end, int flush);
453
454 int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest,
455 vp9_ppflags_t *flags);
456
457 int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags);
458
459 void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags);
460
461 int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
462 YV12_BUFFER_CONFIG *sd);
463
464 int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag,
465 YV12_BUFFER_CONFIG *sd);
466
467 int vp9_update_entropy(VP9_COMP *cpi, int update);
468
469 int vp9_set_active_map(VP9_COMP *cpi, unsigned char *map, int rows, int cols);
470
471 int vp9_set_internal_size(VP9_COMP *cpi,
472 VPX_SCALING horiz_mode, VPX_SCALING vert_mode);
473
474 int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width,
475 unsigned int height);
476
477 void vp9_set_svc(VP9_COMP *cpi, int use_svc);
478
479 int vp9_get_quantizer(struct VP9_COMP *cpi);
480
get_ref_frame_idx(const VP9_COMP * cpi,MV_REFERENCE_FRAME ref_frame)481 static INLINE int get_ref_frame_idx(const VP9_COMP *cpi,
482 MV_REFERENCE_FRAME ref_frame) {
483 if (ref_frame == LAST_FRAME) {
484 return cpi->lst_fb_idx;
485 } else if (ref_frame == GOLDEN_FRAME) {
486 return cpi->gld_fb_idx;
487 } else {
488 return cpi->alt_fb_idx;
489 }
490 }
491
get_ref_frame_buffer(VP9_COMP * cpi,MV_REFERENCE_FRAME ref_frame)492 static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer(
493 VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) {
494 VP9_COMMON * const cm = &cpi->common;
495 return &cm->frame_bufs[cm->ref_frame_map[get_ref_frame_idx(cpi, ref_frame)]]
496 .buf;
497 }
498
499 // Intra only frames, golden frames (except alt ref overlays) and
500 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const VP9_COMP * cpi)501 static INLINE int frame_is_boosted(const VP9_COMP *cpi) {
502 return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame ||
503 (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref) ||
504 vp9_is_upper_layer_key_frame(cpi);
505 }
506
get_token_alloc(int mb_rows,int mb_cols)507 static INLINE int get_token_alloc(int mb_rows, int mb_cols) {
508 // TODO(JBB): double check we can't exceed this token count if we have a
509 // 32x32 transform crossing a boundary at a multiple of 16.
510 // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full
511 // resolution. We assume up to 1 token per pixel, and then allow
512 // a head room of 4.
513 return mb_rows * mb_cols * (16 * 16 * 3 + 4);
514 }
515
516 int vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b);
517
518 void vp9_alloc_compressor_data(VP9_COMP *cpi);
519
520 void vp9_scale_references(VP9_COMP *cpi);
521
522 void vp9_update_reference_frames(VP9_COMP *cpi);
523
524 int64_t vp9_rescale(int64_t val, int64_t num, int denom);
525
526 void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv);
527
528 YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm,
529 YV12_BUFFER_CONFIG *unscaled,
530 YV12_BUFFER_CONFIG *scaled);
531
532 void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags);
533
is_spatial_svc(const struct VP9_COMP * const cpi)534 static INLINE int is_spatial_svc(const struct VP9_COMP *const cpi) {
535 return cpi->use_svc &&
536 cpi->svc.number_temporal_layers == 1 &&
537 cpi->svc.number_spatial_layers > 1;
538 }
539
is_altref_enabled(const VP9_COMP * const cpi)540 static INLINE int is_altref_enabled(const VP9_COMP *const cpi) {
541 return cpi->oxcf.mode != REALTIME && cpi->oxcf.lag_in_frames > 0 &&
542 (cpi->oxcf.play_alternate &&
543 (!is_spatial_svc(cpi) ||
544 cpi->oxcf.ss_play_alternate[cpi->svc.spatial_layer_id]));
545 }
546
set_ref_ptrs(VP9_COMMON * cm,MACROBLOCKD * xd,MV_REFERENCE_FRAME ref0,MV_REFERENCE_FRAME ref1)547 static INLINE void set_ref_ptrs(VP9_COMMON *cm, MACROBLOCKD *xd,
548 MV_REFERENCE_FRAME ref0,
549 MV_REFERENCE_FRAME ref1) {
550 xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME
551 : 0];
552 xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME
553 : 0];
554 }
555
get_chessboard_index(const int frame_index)556 static INLINE int get_chessboard_index(const int frame_index) {
557 return frame_index & 0x1;
558 }
559
560 #ifdef __cplusplus
561 } // extern "C"
562 #endif
563
564 #endif // VP9_ENCODER_VP9_ENCODER_H_
565