1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 
14 #include "av1/encoder/encoder.h"
15 #include "av1/encoder/speed_features.h"
16 #include "av1/encoder/rdopt.h"
17 
18 #include "aom_dsp/aom_dsp_common.h"
19 
20 #define MAX_MESH_SPEED 5  // Max speed setting for mesh motion method
21 // Max speed setting for tx domain evaluation
22 #define MAX_TX_DOMAIN_EVAL_SPEED 5
23 static MESH_PATTERN
24     good_quality_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
25       { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
26       { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } },
27       { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } },
28       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
29       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
30       { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } },
31     };
32 static unsigned char good_quality_max_mesh_pct[MAX_MESH_SPEED + 1] = {
33   50, 50, 25, 15, 5, 1
34 };
35 
36 // TODO(huisu@google.com): These settings are pretty relaxed, tune them for
37 // each speed setting
38 static MESH_PATTERN intrabc_mesh_patterns[MAX_MESH_SPEED + 1][MAX_MESH_STEP] = {
39   { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
40   { { 256, 1 }, { 256, 1 }, { 0, 0 }, { 0, 0 } },
41   { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
42   { { 64, 1 }, { 64, 1 }, { 0, 0 }, { 0, 0 } },
43   { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
44   { { 64, 4 }, { 16, 1 }, { 0, 0 }, { 0, 0 } },
45 };
46 static uint8_t intrabc_max_mesh_pct[MAX_MESH_SPEED + 1] = { 100, 100, 100,
47                                                             25,  25,  10 };
48 
49 // Threshold values to be used for pruning the txfm_domain_distortion
50 // based on block MSE
51 // TODO(any): Experiment the threshold logic based on variance metric
52 static unsigned int tx_domain_dist_thresholds[MAX_TX_DOMAIN_EVAL_SPEED + 1] = {
53   UINT_MAX, 162754, 22026, 22026, 22026, 0
54 };
55 // Threshold values to be used for disabling coeff RD-optimization
56 // based on block MSE
57 // TODO(any): Experiment the threshold logic based on variance metric
58 static unsigned int coeff_opt_dist_thresholds[5] = { UINT_MAX, 162754, 162754,
59                                                      22026, 22026 };
60 // scaling values to be used for gating wedge/compound segment based on best
61 // approximate rd
62 static int comp_type_rd_threshold_mul[3] = { 1, 11, 12 };
63 static int comp_type_rd_threshold_div[3] = { 3, 16, 16 };
64 
65 // Intra only frames, golden frames (except alt ref overlays) and
66 // alt ref frames tend to be coded at a higher than ambient quality
frame_is_boosted(const AV1_COMP * cpi)67 static int frame_is_boosted(const AV1_COMP *cpi) {
68   return frame_is_kf_gf_arf(cpi);
69 }
70 
71 // Sets a partition size down to which the auto partition code will always
72 // search (can go lower), based on the image dimensions. The logic here
73 // is that the extent to which ringing artefacts are offensive, depends
74 // partly on the screen area that over which they propogate. Propogation is
75 // limited by transform block size but the screen area take up by a given block
76 // size will be larger for a small image format stretched to full screen.
set_partition_min_limit(const AV1_COMMON * const cm)77 static BLOCK_SIZE set_partition_min_limit(const AV1_COMMON *const cm) {
78   unsigned int screen_area = (cm->width * cm->height);
79 
80   // Select block size based on image format size.
81   if (screen_area < 1280 * 720) {
82     // Formats smaller in area than 720P
83     return BLOCK_4X4;
84   } else if (screen_area < 1920 * 1080) {
85     // Format >= 720P and < 1080P
86     return BLOCK_8X8;
87   } else {
88     // Formats 1080P and up
89     return BLOCK_16X16;
90   }
91 }
92 
set_good_speed_feature_framesize_dependent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)93 static void set_good_speed_feature_framesize_dependent(
94     const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
95   const AV1_COMMON *const cm = &cpi->common;
96   const int is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720;
97   const int is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480;
98 
99   if (is_480p_or_larger) {
100     sf->use_square_partition_only_threshold = BLOCK_128X128;
101     if (is_720p_or_larger)
102       sf->auto_max_partition_based_on_simple_motion = ADAPT_PRED;
103     else
104       sf->auto_max_partition_based_on_simple_motion = RELAXED_PRED;
105   } else {
106     sf->use_square_partition_only_threshold = BLOCK_64X64;
107     sf->auto_max_partition_based_on_simple_motion = DIRECT_PRED;
108   }
109 
110   // TODO(huisu@google.com): train models for 720P and above.
111   if (!is_720p_or_larger) {
112     sf->ml_partition_search_breakout_thresh[0] = 200;  // BLOCK_8X8
113     sf->ml_partition_search_breakout_thresh[1] = 250;  // BLOCK_16X16
114     sf->ml_partition_search_breakout_thresh[2] = 300;  // BLOCK_32X32
115     sf->ml_partition_search_breakout_thresh[3] = 500;  // BLOCK_64X64
116     sf->ml_partition_search_breakout_thresh[4] = -1;   // BLOCK_128X128
117   }
118 
119   if (is_720p_or_larger && speed >= CONFIG_2PASS_PARTITION_SEARCH_LVL_START &&
120       speed < CONFIG_2PASS_PARTITION_SEARCH_LVL_END) {
121     sf->two_pass_partition_search = 1;
122   }
123 
124   if (speed >= 1) {
125     if (is_720p_or_larger) {
126       sf->use_square_partition_only_threshold = BLOCK_128X128;
127     } else if (is_480p_or_larger) {
128       sf->use_square_partition_only_threshold = BLOCK_64X64;
129     } else {
130       sf->use_square_partition_only_threshold = BLOCK_32X32;
131     }
132 
133     if (!is_720p_or_larger) {
134       sf->ml_partition_search_breakout_thresh[0] = 200;  // BLOCK_8X8
135       sf->ml_partition_search_breakout_thresh[1] = 250;  // BLOCK_16X16
136       sf->ml_partition_search_breakout_thresh[2] = 300;  // BLOCK_32X32
137       sf->ml_partition_search_breakout_thresh[3] = 300;  // BLOCK_64X64
138       sf->ml_partition_search_breakout_thresh[4] = -1;   // BLOCK_128X128
139 
140       sf->firstpass_simple_motion_search_early_term = 1;
141     }
142   }
143 
144   if (speed >= 2) {
145     if (is_720p_or_larger) {
146       sf->use_square_partition_only_threshold = BLOCK_64X64;
147     } else if (is_480p_or_larger) {
148       sf->use_square_partition_only_threshold = BLOCK_32X32;
149     } else {
150       // TODO(chiyotsai@google.com): Setting the threshold to BLOCK_16X16 incurs
151       // a large loss (about 0.584%). Try increasing the threshold on boosted
152       // frame and see if it improves the performance.
153       sf->use_square_partition_only_threshold = BLOCK_32X32;
154     }
155 
156     if (is_720p_or_larger) {
157       sf->adaptive_pred_interp_filter = 0;
158       sf->partition_search_breakout_dist_thr = (1 << 24);
159       sf->partition_search_breakout_rate_thr = 120;
160     } else {
161       sf->partition_search_breakout_dist_thr = (1 << 22);
162       sf->partition_search_breakout_rate_thr = 100;
163     }
164     sf->rd_auto_partition_min_limit = set_partition_min_limit(cm);
165   }
166 
167   if (speed >= 3) {
168     if (is_720p_or_larger) {
169       sf->partition_search_breakout_dist_thr = (1 << 25);
170       sf->partition_search_breakout_rate_thr = 200;
171     } else {
172       sf->max_intra_bsize = BLOCK_32X32;
173       sf->partition_search_breakout_dist_thr = (1 << 23);
174       sf->partition_search_breakout_rate_thr = 120;
175     }
176     sf->use_first_partition_pass_interintra_stats =
177         sf->two_pass_partition_search;
178   }
179 
180   if (speed >= 4) {
181     if (is_720p_or_larger) {
182       sf->partition_search_breakout_dist_thr = (1 << 26);
183     } else {
184       sf->partition_search_breakout_dist_thr = (1 << 24);
185     }
186   }
187 }
188 
set_good_speed_features_framesize_independent(const AV1_COMP * const cpi,SPEED_FEATURES * const sf,int speed)189 static void set_good_speed_features_framesize_independent(
190     const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) {
191   const AV1_COMMON *const cm = &cpi->common;
192   const int boosted = frame_is_boosted(cpi);
193   const int is_boosted_arf2_bwd_type =
194       boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame;
195 
196   // Speed 0 for all speed features that give neutral coding performance change.
197   sf->reduce_inter_modes = 1;
198   sf->prune_ext_partition_types_search_level = 1;
199   sf->ml_prune_rect_partition = 1;
200   sf->ml_prune_ab_partition = 1;
201   sf->ml_prune_4_partition = 1;
202   sf->simple_motion_search_prune_rect = 1;
203   sf->adaptive_txb_search_level = 1;
204   sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
205   sf->model_based_prune_tx_search_level = 1;
206   sf->model_based_post_interp_filter_breakout = 1;
207   sf->model_based_motion_mode_rd_breakout = 1;
208 
209   // TODO(debargha): Test, tweak and turn on either 1 or 2
210   sf->inter_mode_rd_model_estimation = 1;
211   sf->inter_mode_rd_model_estimation_adaptive = 0;
212 
213   sf->two_loop_comp_search = 0;
214   sf->prune_ref_frame_for_rect_partitions =
215       boosted ? 0 : (is_boosted_arf2_bwd_type ? 1 : 2);
216   sf->less_rectangular_check_level = 1;
217   sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
218   sf->gm_disable_recode = 1;
219   sf->use_fast_interpolation_filter_search = 1;
220   sf->intra_tx_size_search_init_depth_sqr = 1;
221   sf->intra_angle_estimation = 1;
222   sf->selective_ref_frame = 1;
223   sf->prune_wedge_pred_diff_based = 1;
224   sf->disable_wedge_search_var_thresh = 0;
225   sf->disable_wedge_search_edge_thresh = 0;
226   sf->prune_motion_mode_level = 1;
227   sf->cb_pred_filter_search = 0;
228   sf->use_nonrd_pick_mode = 0;
229   sf->use_real_time_ref_set = 0;
230 
231   if (speed >= 1) {
232     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
233     sf->selective_ref_frame = 2;
234 
235     sf->intra_tx_size_search_init_depth_rect = 1;
236     sf->tx_size_search_lgr_block = 1;
237 
238     sf->prune_ext_partition_types_search_level = 2;
239     sf->skip_repeat_interpolation_filter_search = 1;
240     sf->tx_type_search.skip_tx_search = 1;
241     sf->tx_type_search.ml_tx_split_thresh = 40;
242     sf->model_based_prune_tx_search_level = 0;
243     sf->adaptive_txb_search_level = 2;
244     sf->use_intra_txb_hash = 1;
245     sf->optimize_b_precheck = 1;
246     sf->dual_sgr_penalty_level = 1;
247     sf->use_accurate_subpel_search = USE_4_TAPS;
248     sf->reuse_inter_intra_mode = 1;
249     sf->prune_comp_search_by_single_result = 1;
250     sf->skip_repeated_newmv = 1;
251     sf->obmc_full_pixel_search_level = 1;
252     // TODO(anyone): Following speed feature will be further explored to
253     // identify the appropriate tradeoff between encoder performance and its
254     // speed.
255     sf->prune_single_motion_modes_by_simple_trans = 1;
256 
257     sf->simple_motion_search_split_only = 1;
258     sf->simple_motion_search_early_term_none = 1;
259 
260     sf->disable_wedge_search_var_thresh = 0;
261     sf->disable_wedge_search_edge_thresh = 0;
262     sf->disable_interinter_wedge_newmv_search = boosted ? 0 : 1;
263     sf->prune_comp_type_by_comp_avg = 1;
264     sf->prune_motion_mode_level = 2;
265     sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
266     sf->cb_pred_filter_search = 1;
267     sf->use_transform_domain_distortion = boosted ? 0 : 1;
268     sf->perform_coeff_opt = boosted ? 0 : 1;
269     sf->use_inter_txb_hash = 0;
270   }
271 
272   if (speed >= 2) {
273     sf->gm_erroradv_type = GM_ERRORADV_TR_2;
274 
275     sf->selective_ref_frame = 3;
276     sf->inter_tx_size_search_init_depth_rect = 1;
277     sf->inter_tx_size_search_init_depth_sqr = 1;
278 
279     sf->fast_cdef_search = 1;
280 
281     sf->adaptive_rd_thresh = 1;
282     sf->mv.auto_mv_step_size = 1;
283     sf->mv.subpel_iters_per_step = 1;
284     sf->disable_filter_search_var_thresh = 100;
285     sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
286 
287     sf->partition_search_breakout_rate_thr = 80;
288     sf->allow_partition_search_skip = 1;
289     sf->disable_wedge_search_var_thresh = 100;
290     sf->disable_wedge_search_edge_thresh = 0;
291     sf->disable_interinter_wedge_newmv_search = 1;
292     sf->fast_wedge_sign_estimate = 1;
293     sf->disable_dual_filter = 1;
294     sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
295     sf->prune_comp_type_by_comp_avg = 2;
296     // TODO(Sachin): Enable/Enhance this speed feature for speed 2 & 3
297     sf->cb_pred_filter_search = 0;
298     sf->adaptive_interp_filter_search = 1;
299     sf->perform_coeff_opt = boosted ? 0 : 2;
300   }
301 
302   if (speed >= 3) {
303     sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
304     sf->less_rectangular_check_level = 2;
305     sf->adaptive_pred_interp_filter = 1;
306     // adaptive_motion_search breaks encoder multi-thread tests.
307     // The values in x->pred_mv[] differ for single and multi-thread cases.
308     // See aomedia:1778.
309     // sf->adaptive_motion_search = 1;
310     sf->recode_loop = ALLOW_RECODE_KFARFGF;
311     sf->use_transform_domain_distortion = boosted ? 1 : 2;
312     sf->use_accurate_subpel_search = USE_2_TAPS;
313     sf->adaptive_rd_thresh = 2;
314     if (cpi->oxcf.enable_smooth_interintra) {
315       sf->disable_smooth_interintra =
316           (boosted || cpi->refresh_bwd_ref_frame || cpi->refresh_alt2_ref_frame)
317               ? 0
318               : 1;
319     }
320     sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
321     sf->gm_search_type = GM_DISABLE_SEARCH;
322     sf->prune_comp_search_by_single_result = 2;
323     sf->prune_motion_mode_level = boosted ? 2 : 3;
324     sf->prune_warp_using_wmtype = 1;
325     // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
326     // it with cpi->sf.disable_wedge_search_var_thresh.
327     sf->disable_wedge_interintra_search = 1;
328     // TODO(any): Experiment with the early exit mechanism for speeds 0, 1 and 2
329     // and clean-up the speed feature
330     sf->perform_best_rd_based_gating_for_chroma = 1;
331     sf->prune_ref_frame_for_rect_partitions =
332         frame_is_intra_only(&cpi->common) ? 0 : (boosted ? 1 : 2);
333     sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 3;
334     sf->prune_comp_type_by_model_rd = boosted ? 0 : 1;
335     // TODO(Venkat): Clean-up frame type dependency for
336     // simple_motion_search_split_only in partition search function and set the
337     // speed feature accordingly
338     // TODO(Venkat): Evaluate this speed feature for speed 1 & 2
339     sf->simple_motion_search_split_only =
340         cm->allow_screen_content_tools ? 1 : 2;
341     sf->disable_smooth_intra =
342         !frame_is_intra_only(&cpi->common) || (cpi->rc.frames_to_key != 1);
343   }
344 
345   if (speed >= 4) {
346     sf->use_intra_txb_hash = 0;
347     sf->tx_type_search.fast_intra_tx_type_search = 1;
348     sf->disable_loop_restoration_chroma =
349         (boosted || cm->allow_screen_content_tools) ? 0 : 1;
350     sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
351     sf->adaptive_pred_interp_filter = 0;
352     sf->cb_pred_filter_search = 1;
353     sf->adaptive_mode_search = 1;
354     sf->alt_ref_search_fp = 1;
355     sf->skip_sharp_interp_filter_search = 1;
356     sf->perform_coeff_opt = is_boosted_arf2_bwd_type ? 2 : 4;
357     sf->adaptive_txb_search_level = boosted ? 2 : 3;
358   }
359 
360   if (speed >= 5) {
361     sf->recode_loop = ALLOW_RECODE_KFMAXBW;
362     sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
363     sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
364     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
365     sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
366     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
367     sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
368     sf->tx_size_search_method = USE_LARGESTALL;
369     sf->mv.search_method = BIGDIA;
370     sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
371     sf->adaptive_rd_thresh = 4;
372     sf->mode_search_skip_flags =
373         (cm->current_frame.frame_type == KEY_FRAME)
374             ? 0
375             : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
376                   FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
377                   FLAG_EARLY_TERMINATE;
378     sf->disable_filter_search_var_thresh = 200;
379     sf->use_fast_coef_costing = 1;
380     sf->partition_search_breakout_rate_thr = 300;
381     sf->use_transform_domain_distortion = 2;
382   }
383 
384   if (speed >= 6) {
385     int i;
386     sf->optimize_coefficients = NO_TRELLIS_OPT;
387     sf->mv.search_method = HEX;
388     sf->disable_filter_search_var_thresh = 500;
389     for (i = 0; i < TX_SIZES; ++i) {
390       sf->intra_y_mode_mask[i] = INTRA_DC;
391       sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
392     }
393     sf->partition_search_breakout_rate_thr = 500;
394     sf->mv.reduce_first_step_size = 1;
395     sf->simple_model_rd_from_var = 1;
396   }
397   if (speed >= 7) {
398     sf->default_max_partition_size = BLOCK_32X32;
399     sf->default_min_partition_size = BLOCK_8X8;
400     sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
401     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
402     sf->frame_parameter_update = 0;
403     sf->mv.search_method = FAST_HEX;
404     sf->partition_search_type = REFERENCE_PARTITION;
405     sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
406     // TODO(any): evaluate adaptive_mode_search=1 for speed 7 & 8
407     sf->adaptive_mode_search = 2;
408   }
409   if (speed >= 8) {
410     sf->mv.search_method = FAST_DIAMOND;
411     sf->mv.subpel_force_stop = HALF_PEL;
412     sf->lpf_pick = LPF_PICK_FROM_Q;
413   }
414 }
415 
416 // TODO(kyslov): now this is very similar to
417 // set_good_speed_features_framesize_independent
418 //               except it sets non-rd flag on speed8. This function will likely
419 //               be modified in the future with RT-specific speed features
set_rt_speed_features_framesize_independent(AV1_COMP * cpi,SPEED_FEATURES * sf,int speed)420 static void set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
421                                                         SPEED_FEATURES *sf,
422                                                         int speed) {
423   AV1_COMMON *const cm = &cpi->common;
424   const int boosted = frame_is_boosted(cpi);
425 
426   // Speed 0 for all speed features that give neutral coding performance change.
427   sf->reduce_inter_modes = 1;
428   sf->prune_ext_partition_types_search_level = 1;
429   sf->ml_prune_rect_partition = 1;
430   sf->ml_prune_ab_partition = 1;
431   sf->ml_prune_4_partition = 1;
432   sf->adaptive_txb_search_level = 1;
433   sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_SKIP_MV_SEARCH;
434   sf->model_based_prune_tx_search_level = 1;
435   sf->model_based_post_interp_filter_breakout = 1;
436   sf->model_based_motion_mode_rd_breakout = 1;
437 
438   // TODO(debargha): Test, tweak and turn on either 1 or 2
439   sf->inter_mode_rd_model_estimation = 0;
440   sf->inter_mode_rd_model_estimation_adaptive = 0;
441   sf->two_loop_comp_search = 0;
442 
443   sf->prune_ref_frame_for_rect_partitions = !boosted;
444   sf->less_rectangular_check_level = 1;
445   sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3;
446   sf->gm_disable_recode = 1;
447   sf->use_fast_interpolation_filter_search = 1;
448   sf->intra_tx_size_search_init_depth_sqr = 1;
449   sf->intra_angle_estimation = 1;
450   sf->selective_ref_frame = 1;
451   sf->prune_wedge_pred_diff_based = 1;
452   sf->disable_wedge_search_var_thresh = 0;
453   sf->disable_wedge_search_edge_thresh = 0;
454   sf->prune_motion_mode_level = 1;
455   sf->cb_pred_filter_search = 0;
456   sf->use_nonrd_pick_mode = 0;
457   sf->use_real_time_ref_set = 0;
458 
459   if (speed >= 1) {
460     sf->gm_erroradv_type = GM_ERRORADV_TR_1;
461     sf->selective_ref_frame = 2;
462 
463     sf->intra_tx_size_search_init_depth_rect = 1;
464     sf->tx_size_search_lgr_block = 1;
465     sf->prune_ext_partition_types_search_level = 2;
466     sf->skip_repeat_interpolation_filter_search = 1;
467     sf->tx_type_search.skip_tx_search = 1;
468     sf->tx_type_search.ml_tx_split_thresh = 40;
469     sf->model_based_prune_tx_search_level = 0;
470     sf->adaptive_txb_search_level = 2;
471     sf->use_intra_txb_hash = 1;
472     sf->optimize_b_precheck = 1;
473     sf->dual_sgr_penalty_level = 1;
474     sf->use_accurate_subpel_search = USE_4_TAPS;
475     sf->reuse_inter_intra_mode = 1;
476     sf->prune_comp_search_by_single_result = 1;
477     sf->skip_repeated_newmv = 1;
478     sf->obmc_full_pixel_search_level = 1;
479     // TODO(anyone): Following speed feature will be further explored to
480     // identify the appropriate tradeoff between encoder performance and its
481     // speed.
482     sf->prune_single_motion_modes_by_simple_trans = 1;
483 
484     sf->simple_motion_search_prune_rect = 1;
485 
486     sf->disable_wedge_search_var_thresh = 0;
487     sf->disable_wedge_search_edge_thresh = 0;
488     sf->prune_comp_type_by_comp_avg = 1;
489     sf->prune_motion_mode_level = 2;
490     sf->gm_search_type = GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2;
491     sf->cb_pred_filter_search = 1;
492     sf->use_transform_domain_distortion = boosted ? 0 : 1;
493   }
494 
495   if (speed >= 2) {
496     sf->gm_erroradv_type = GM_ERRORADV_TR_2;
497 
498     sf->selective_ref_frame = 3;
499     sf->inter_tx_size_search_init_depth_rect = 1;
500     sf->inter_tx_size_search_init_depth_sqr = 1;
501     sf->fast_cdef_search = 1;
502 
503     sf->adaptive_rd_thresh = 1;
504     sf->mv.auto_mv_step_size = 1;
505     sf->mv.subpel_iters_per_step = 1;
506     sf->disable_filter_search_var_thresh = 100;
507     sf->comp_inter_joint_search_thresh = BLOCK_SIZES_ALL;
508 
509     sf->partition_search_breakout_rate_thr = 80;
510     sf->allow_partition_search_skip = 1;
511     sf->disable_wedge_search_var_thresh = 100;
512     sf->disable_wedge_search_edge_thresh = 0;
513     sf->fast_wedge_sign_estimate = 1;
514     sf->disable_dual_filter = 1;
515     sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
516     sf->prune_comp_type_by_comp_avg = 2;
517     sf->cb_pred_filter_search = 0;
518     sf->adaptive_interp_filter_search = 1;
519   }
520 
521   if (speed >= 3) {
522     sf->selective_ref_frame = 4;
523     sf->tx_size_search_method = boosted ? USE_FULL_RD : USE_LARGESTALL;
524     sf->less_rectangular_check_level = 2;
525     sf->adaptive_pred_interp_filter = 1;
526     // adaptive_motion_search breaks encoder multi-thread tests.
527     // The values in x->pred_mv[] differ for single and multi-thread cases.
528     // See aomedia:1778.
529     // sf->adaptive_motion_search = 1;
530     sf->recode_loop = ALLOW_RECODE_KFARFGF;
531     sf->use_transform_domain_distortion = 1;
532     sf->use_accurate_subpel_search = USE_2_TAPS;
533     sf->adaptive_rd_thresh = 2;
534     sf->tx_type_search.prune_mode = PRUNE_2D_FAST;
535     sf->gm_search_type = GM_DISABLE_SEARCH;
536     sf->prune_comp_search_by_single_result = 2;
537     sf->prune_motion_mode_level = boosted ? 2 : 3;
538     sf->prune_warp_using_wmtype = 1;
539     // TODO(yunqing): evaluate this speed feature for speed 1 & 2, and combine
540     // it with cpi->sf.disable_wedge_search_var_thresh.
541     sf->disable_wedge_interintra_search = 1;
542   }
543 
544   if (speed >= 4) {
545     sf->use_intra_txb_hash = 0;
546     sf->use_mb_rd_hash = 0;
547     sf->tx_type_search.fast_intra_tx_type_search = 1;
548     sf->tx_type_search.fast_inter_tx_type_search = 1;
549     sf->tx_size_search_method =
550         frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL;
551     sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED;
552     sf->adaptive_pred_interp_filter = 0;
553     sf->adaptive_mode_search = 1;
554     sf->alt_ref_search_fp = 1;
555     sf->skip_sharp_interp_filter_search = 1;
556   }
557 
558   if (speed >= 5) {
559     sf->recode_loop = ALLOW_RECODE_KFMAXBW;
560     sf->intra_y_mode_mask[TX_64X64] = INTRA_DC_H_V;
561     sf->intra_uv_mode_mask[TX_64X64] = UV_INTRA_DC_H_V_CFL;
562     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V;
563     sf->intra_uv_mode_mask[TX_32X32] = UV_INTRA_DC_H_V_CFL;
564     sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V;
565     sf->intra_uv_mode_mask[TX_16X16] = UV_INTRA_DC_H_V_CFL;
566     sf->tx_size_search_method = USE_LARGESTALL;
567     sf->mv.search_method = BIGDIA;
568     sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
569     sf->adaptive_rd_thresh = 4;
570     sf->mode_search_skip_flags =
571         (cm->current_frame.frame_type == KEY_FRAME)
572             ? 0
573             : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER |
574                   FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR |
575                   FLAG_EARLY_TERMINATE;
576     sf->disable_filter_search_var_thresh = 200;
577     sf->use_fast_coef_costing = 1;
578     sf->partition_search_breakout_rate_thr = 300;
579     sf->use_transform_domain_distortion = 2;
580   }
581 
582   if (speed >= 6) {
583     int i;
584     sf->optimize_coefficients = NO_TRELLIS_OPT;
585     sf->mv.search_method = HEX;
586     sf->disable_filter_search_var_thresh = 500;
587     for (i = 0; i < TX_SIZES; ++i) {
588       sf->intra_y_mode_mask[i] = INTRA_DC;
589       sf->intra_uv_mode_mask[i] = UV_INTRA_DC_CFL;
590     }
591     sf->partition_search_breakout_rate_thr = 500;
592     sf->mv.reduce_first_step_size = 1;
593     sf->simple_model_rd_from_var = 1;
594   }
595   if (speed >= 7) {
596     sf->default_max_partition_size = BLOCK_32X32;
597     sf->default_min_partition_size = BLOCK_8X8;
598     sf->intra_y_mode_mask[TX_64X64] = INTRA_DC;
599     sf->intra_y_mode_mask[TX_32X32] = INTRA_DC;
600     sf->frame_parameter_update = 0;
601     sf->mv.search_method = FAST_HEX;
602     sf->partition_search_type = REFERENCE_PARTITION;
603     sf->mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
604   }
605   if (speed >= 8) {
606     sf->mv.search_method = FAST_DIAMOND;
607     sf->lpf_pick = LPF_PICK_FROM_Q;
608     sf->default_max_partition_size = BLOCK_128X128;
609     sf->default_min_partition_size = BLOCK_8X8;
610     sf->partition_search_type = VAR_BASED_PARTITION;
611     sf->use_real_time_ref_set = 1;
612     // Can't use LARGEST TX mode with pre-calculated partition
613     // and disabled TX64
614     if (!cpi->oxcf.enable_tx64) sf->tx_size_search_method = USE_FAST_RD;
615     sf->use_nonrd_pick_mode = 1;
616     sf->inter_mode_rd_model_estimation = 2;
617   }
618 }
619 
av1_set_speed_features_framesize_dependent(AV1_COMP * cpi,int speed)620 void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
621   SPEED_FEATURES *const sf = &cpi->sf;
622   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
623 
624   if (oxcf->mode == GOOD) {
625     set_good_speed_feature_framesize_dependent(cpi, sf, speed);
626   }
627 
628   // This is only used in motion vector unit test.
629   if (cpi->oxcf.motion_vector_unit_test == 1)
630     cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
631   else if (cpi->oxcf.motion_vector_unit_test == 2)
632     cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
633 }
634 
av1_set_speed_features_framesize_independent(AV1_COMP * cpi,int speed)635 void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
636   AV1_COMMON *const cm = &cpi->common;
637   SPEED_FEATURES *const sf = &cpi->sf;
638   MACROBLOCK *const x = &cpi->td.mb;
639   const AV1EncoderConfig *const oxcf = &cpi->oxcf;
640   int i;
641 
642   // best quality defaults
643   sf->frame_parameter_update = 1;
644   sf->mv.search_method = NSTEP;
645   sf->recode_loop = ALLOW_RECODE;
646   sf->mv.subpel_search_method = SUBPEL_TREE;
647   sf->mv.subpel_iters_per_step = 2;
648   sf->mv.subpel_force_stop = EIGHTH_PEL;
649   if (cpi->oxcf.disable_trellis_quant == 3) {
650     sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf)
651                                     ? NO_ESTIMATE_YRD_TRELLIS_OPT
652                                     : NO_TRELLIS_OPT;
653   } else if (cpi->oxcf.disable_trellis_quant == 2) {
654     sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf)
655                                     ? FINAL_PASS_TRELLIS_OPT
656                                     : NO_TRELLIS_OPT;
657   } else if (cpi->oxcf.disable_trellis_quant == 0) {
658     if (is_lossless_requested(&cpi->oxcf))
659       sf->optimize_coefficients = NO_TRELLIS_OPT;
660     else
661       sf->optimize_coefficients = FULL_TRELLIS_OPT;
662   } else if (cpi->oxcf.disable_trellis_quant == 1) {
663     sf->optimize_coefficients = NO_TRELLIS_OPT;
664   } else {
665     assert(0 && "Invalid disable_trellis_quant value");
666   }
667   sf->gm_erroradv_type = GM_ERRORADV_TR_0;
668   sf->mv.reduce_first_step_size = 0;
669   sf->mv.auto_mv_step_size = 0;
670   sf->comp_inter_joint_search_thresh = BLOCK_4X4;
671   sf->adaptive_rd_thresh = 0;
672   // TODO(sarahparker) Pair this with a speed setting once experiments are done
673   sf->trellis_eob_fast = 0;
674   sf->tx_size_search_method = cpi->oxcf.tx_size_search_method;
675   sf->inter_tx_size_search_init_depth_sqr = 0;
676   sf->inter_tx_size_search_init_depth_rect = 0;
677   sf->intra_tx_size_search_init_depth_rect = 0;
678   sf->intra_tx_size_search_init_depth_sqr = 0;
679   sf->tx_size_search_lgr_block = 0;
680   sf->model_based_prune_tx_search_level = 0;
681   sf->model_based_post_interp_filter_breakout = 0;
682   sf->model_based_motion_mode_rd_breakout = 0;
683   sf->reduce_inter_modes = 0;
684   sf->selective_ref_gm = 1;
685   sf->adaptive_motion_search = 0;
686   sf->adaptive_pred_interp_filter = 0;
687   sf->adaptive_mode_search = 0;
688   sf->alt_ref_search_fp = 0;
689   sf->partition_search_type = SEARCH_PARTITION;
690   sf->tx_type_search.prune_mode = PRUNE_2D_ACCURATE;
691   sf->tx_type_search.ml_tx_split_thresh = 30;
692   sf->tx_type_search.use_skip_flag_prediction = 1;
693   sf->tx_type_search.fast_intra_tx_type_search = 0;
694   sf->tx_type_search.fast_inter_tx_type_search = 0;
695   sf->tx_type_search.skip_tx_search = 0;
696   sf->selective_ref_frame = 0;
697   sf->less_rectangular_check_level = 0;
698   sf->use_square_partition_only_threshold = BLOCK_128X128;
699   sf->prune_ref_frame_for_rect_partitions = 0;
700   sf->auto_max_partition_based_on_simple_motion = NOT_IN_USE;
701   sf->auto_min_partition_based_on_simple_motion = 0;
702   sf->rd_auto_partition_min_limit = BLOCK_4X4;
703   sf->default_max_partition_size = BLOCK_LARGEST;
704   sf->default_min_partition_size = BLOCK_4X4;
705   sf->adjust_partitioning_from_last_frame = 0;
706   sf->mode_search_skip_flags = 0;
707   sf->disable_filter_search_var_thresh = 0;
708   sf->allow_partition_search_skip = 0;
709   sf->use_accurate_subpel_search = USE_8_TAPS;
710   sf->disable_wedge_search_edge_thresh = 0;
711   sf->use_first_partition_pass_interintra_stats = 0;
712   sf->disable_wedge_search_var_thresh = 0;
713   sf->disable_loop_restoration_chroma = 0;
714   sf->fast_wedge_sign_estimate = 0;
715   sf->prune_wedge_pred_diff_based = 0;
716   sf->drop_ref = 0;
717   sf->skip_intra_in_interframe = 1;
718   sf->txb_split_cap = 1;
719   sf->adaptive_txb_search_level = 0;
720   sf->two_pass_partition_search = 0;
721   sf->firstpass_simple_motion_search_early_term = 0;
722   sf->use_intra_txb_hash = 0;
723   sf->use_inter_txb_hash = 1;
724   sf->use_mb_rd_hash = 1;
725   sf->optimize_b_precheck = 0;
726   sf->two_loop_comp_search = 1;
727   sf->second_loop_comp_fast_tx_search = 0;
728   sf->use_dist_wtd_comp_flag = DIST_WTD_COMP_ENABLED;
729   sf->reuse_inter_intra_mode = 0;
730   sf->intra_angle_estimation = 0;
731   sf->skip_obmc_in_uniform_mv_field = 0;
732   sf->skip_wm_in_uniform_mv_field = 0;
733   sf->adaptive_interp_filter_search = 0;
734 
735   for (i = 0; i < TX_SIZES; i++) {
736     sf->intra_y_mode_mask[i] = INTRA_ALL;
737     sf->intra_uv_mode_mask[i] = UV_INTRA_ALL;
738   }
739   sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE;
740   sf->use_fast_coef_costing = 0;
741   sf->max_intra_bsize = BLOCK_LARGEST;
742   // This setting only takes effect when partition_search_type is set
743   // to FIXED_PARTITION.
744   sf->always_this_block_size = BLOCK_16X16;
745   // Recode loop tolerance %.
746   sf->recode_tolerance = 25;
747   sf->partition_search_breakout_dist_thr = 0;
748   sf->partition_search_breakout_rate_thr = 0;
749   sf->simple_model_rd_from_var = 0;
750   sf->prune_ext_partition_types_search_level = 0;
751   sf->ml_prune_rect_partition = 0;
752   sf->ml_prune_ab_partition = 0;
753   sf->ml_prune_4_partition = 0;
754   sf->fast_cdef_search = 0;
755   for (i = 0; i < PARTITION_BLOCK_SIZES; ++i) {
756     sf->ml_partition_search_breakout_thresh[i] = -1;  // -1 means not enabled.
757   }
758   sf->simple_motion_search_split_only = 0;
759   sf->simple_motion_search_prune_rect = 0;
760   sf->simple_motion_search_early_term_none = 0;
761 
762   // Set this at the appropriate speed levels
763   sf->use_transform_domain_distortion = 0;
764   sf->gm_search_type = GM_FULL_SEARCH;
765   sf->gm_disable_recode = 0;
766   sf->use_fast_interpolation_filter_search = 0;
767   sf->disable_dual_filter = 0;
768   sf->skip_repeat_interpolation_filter_search = 0;
769   sf->use_hash_based_trellis = 0;
770   sf->prune_comp_search_by_single_result = 0;
771   sf->skip_repeated_newmv = 0;
772   sf->prune_single_motion_modes_by_simple_trans = 0;
773 
774   // Set decoder side speed feature to use less dual sgr modes
775   sf->dual_sgr_penalty_level = 0;
776 
777   // TODO(angiebird, debargha): Re-evaluate the impact of
778   // inter_mode_rd_model_estimation in conjunction with
779   // model_based_motion_mode_rd_breakout
780   sf->inter_mode_rd_model_estimation = 0;
781   sf->inter_mode_rd_model_estimation_adaptive = 0;
782 
783   sf->obmc_full_pixel_search_level = 0;
784   sf->skip_sharp_interp_filter_search = 0;
785   sf->prune_comp_type_by_comp_avg = 0;
786   sf->disable_interinter_wedge_newmv_search = 0;
787   sf->disable_smooth_interintra = 0;
788   sf->prune_motion_mode_level = 0;
789   sf->prune_warp_using_wmtype = 0;
790   sf->disable_wedge_interintra_search = 0;
791   sf->perform_coeff_opt = 0;
792   sf->prune_comp_type_by_model_rd = 0;
793   sf->disable_smooth_intra = 0;
794   sf->perform_best_rd_based_gating_for_chroma = 0;
795 
796   if (oxcf->mode == GOOD)
797     set_good_speed_features_framesize_independent(cpi, sf, speed);
798   else if (oxcf->mode == REALTIME)
799     set_rt_speed_features_framesize_independent(cpi, sf, speed);
800 
801   if (!cpi->seq_params_locked) {
802     cpi->common.seq_params.enable_dual_filter &= !sf->disable_dual_filter;
803   }
804 
805   // sf->partition_search_breakout_dist_thr is set assuming max 64x64
806   // blocks. Normalise this if the blocks are bigger.
807   if (MAX_SB_SIZE_LOG2 > 6) {
808     sf->partition_search_breakout_dist_thr <<= 2 * (MAX_SB_SIZE_LOG2 - 6);
809   }
810 
811   cpi->diamond_search_sad = av1_diamond_search_sad;
812 
813   sf->allow_exhaustive_searches = 1;
814 
815   const int mesh_speed = AOMMIN(speed, MAX_MESH_SPEED);
816   if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION)
817     sf->exhaustive_searches_thresh = (1 << 24);
818   else
819     sf->exhaustive_searches_thresh = (1 << 25);
820   sf->max_exaustive_pct = good_quality_max_mesh_pct[mesh_speed];
821   if (mesh_speed > 0)
822     sf->exhaustive_searches_thresh = sf->exhaustive_searches_thresh << 1;
823 
824   for (i = 0; i < MAX_MESH_STEP; ++i) {
825     sf->mesh_patterns[i].range =
826         good_quality_mesh_patterns[mesh_speed][i].range;
827     sf->mesh_patterns[i].interval =
828         good_quality_mesh_patterns[mesh_speed][i].interval;
829   }
830   if ((frame_is_intra_only(cm) && cm->allow_screen_content_tools) &&
831       (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION ||
832        cpi->oxcf.content == AOM_CONTENT_SCREEN)) {
833     for (i = 0; i < MAX_MESH_STEP; ++i) {
834       sf->mesh_patterns[i].range = intrabc_mesh_patterns[mesh_speed][i].range;
835       sf->mesh_patterns[i].interval =
836           intrabc_mesh_patterns[mesh_speed][i].interval;
837     }
838     sf->max_exaustive_pct = intrabc_max_mesh_pct[mesh_speed];
839   }
840 
841   // Slow quant, dct and trellis not worthwhile for first pass
842   // so make sure they are always turned off.
843   if (oxcf->pass == 1) sf->optimize_coefficients = NO_TRELLIS_OPT;
844 
845   // No recode or trellis for 1 pass.
846   if (oxcf->pass == 0) {
847     sf->recode_loop = DISALLOW_RECODE;
848     sf->optimize_coefficients = NO_TRELLIS_OPT;
849   }
850   // FIXME: trellis not very efficient for quantization matrices
851   if (oxcf->using_qm) sf->optimize_coefficients = NO_TRELLIS_OPT;
852 
853   if (sf->mv.subpel_search_method == SUBPEL_TREE) {
854     cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree;
855   } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) {
856     cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned;
857   } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) {
858     cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_more;
859   } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) {
860     cpi->find_fractional_mv_step = av1_find_best_sub_pixel_tree_pruned_evenmore;
861   }
862 
863   x->min_partition_size = sf->default_min_partition_size;
864   x->max_partition_size = sf->default_max_partition_size;
865 
866   // This is only used in motion vector unit test.
867   if (cpi->oxcf.motion_vector_unit_test == 1)
868     cpi->find_fractional_mv_step = av1_return_max_sub_pixel_mv;
869   else if (cpi->oxcf.motion_vector_unit_test == 2)
870     cpi->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
871   cpi->max_comp_type_rd_threshold_mul =
872       comp_type_rd_threshold_mul[sf->prune_comp_type_by_comp_avg];
873   cpi->max_comp_type_rd_threshold_div =
874       comp_type_rd_threshold_div[sf->prune_comp_type_by_comp_avg];
875   const int tx_domain_speed = AOMMIN(speed, MAX_TX_DOMAIN_EVAL_SPEED);
876   cpi->tx_domain_dist_threshold = tx_domain_dist_thresholds[tx_domain_speed];
877 
878   // assert ensures that coeff_opt_dist_thresholds is accessed correctly
879   assert(cpi->sf.perform_coeff_opt >= 0 && cpi->sf.perform_coeff_opt < 5);
880   cpi->coeff_opt_dist_threshold =
881       coeff_opt_dist_thresholds[cpi->sf.perform_coeff_opt];
882 
883 #if CONFIG_DIST_8X8
884   if (sf->use_transform_domain_distortion > 0) cpi->oxcf.using_dist_8x8 = 0;
885 
886   if (cpi->oxcf.using_dist_8x8) x->min_partition_size = BLOCK_8X8;
887 #endif  // CONFIG_DIST_8X8
888   if (cpi->oxcf.row_mt == 1 && (cpi->oxcf.max_threads > 1)) {
889     sf->adaptive_rd_thresh = 0;
890     if (sf->inter_mode_rd_model_estimation == 1) {
891       sf->inter_mode_rd_model_estimation = 0;
892       sf->inter_mode_rd_model_estimation_adaptive = 0;
893     }
894   }
895 }
896