1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #ifndef AOM_AV1_ENCODER_RD_H_
13 #define AOM_AV1_ENCODER_RD_H_
14 
15 #include <limits.h>
16 
17 #include "av1/common/blockd.h"
18 
19 #include "av1/encoder/block.h"
20 #include "av1/encoder/context_tree.h"
21 #include "av1/encoder/cost.h"
22 
23 #ifdef __cplusplus
24 extern "C" {
25 #endif
26 
27 #define RDDIV_BITS 7
28 #define RD_EPB_SHIFT 6
29 
30 #define RDCOST(RM, R, D)                                            \
31   (ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT) + \
32    ((D) * (1 << RDDIV_BITS)))
33 
34 #define RDCOST_NEG_R(RM, R, D) \
35   (((D) * (1 << RDDIV_BITS)) - \
36    ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), AV1_PROB_COST_SHIFT))
37 
38 #define RDCOST_DBL(RM, R, D)                                       \
39   (((((double)(R)) * (RM)) / (double)(1 << AV1_PROB_COST_SHIFT)) + \
40    ((double)(D) * (1 << RDDIV_BITS)))
41 
42 #define QIDX_SKIP_THRESH 115
43 
44 #define MV_COST_WEIGHT 108
45 #define MV_COST_WEIGHT_SUB 120
46 
47 // The fractional part of rd_thresh factor is stored with 5 bits. The maximum
48 // factor that we allow is two, which is stored as 2 ** (5+1) = 64
49 #define RD_THRESH_FAC_FRAC_BITS (5)
50 #define RD_THRESH_FAC_FRAC_VAL (1 << (RD_THRESH_FAC_FRAC_BITS))
51 #define RD_THRESH_MAX_FACT ((RD_THRESH_FAC_FRAC_VAL) << 1)
52 #define RD_THRESH_LOG_DEC_FACTOR (4)
53 #define RD_THRESH_INC (1)
54 
55 // Factor to weigh the rate for switchable interp filters.
56 #define SWITCHABLE_INTERP_RATE_FACTOR 1
57 
58 enum {
59   // Default initialization when we are not using winner mode framework. e.g.
60   // intrabc
61   DEFAULT_EVAL = 0,
62   // Initialization for selecting winner mode
63   MODE_EVAL,
64   // Initialization for winner mode evaluation
65   WINNER_MODE_EVAL,
66   // All mode evaluation types
67   MODE_EVAL_TYPES,
68 } UENUM1BYTE(MODE_EVAL_TYPE);
69 
70 typedef struct RD_OPT {
71   // Thresh_mult is used to set a threshold for the rd score. A higher value
72   // means that we will accept the best mode so far more often. This number
73   // is used in combination with the current block size, and thresh_freq_fact
74   // to pick a threshold.
75   int thresh_mult[MAX_MODES];
76 
77   int threshes[MAX_SEGMENTS][BLOCK_SIZES_ALL][MAX_MODES];
78 
79   int RDMULT;
80 
81   double r0, arf_r0;
82   double mc_saved_base, mc_count_base;
83 } RD_OPT;
84 
85 typedef struct {
86   // Cost of transmitting the actual motion vector.
87   // mv_component[0][i] is the cost of motion vector with horizontal component
88   // (mv_row) equal to i - MV_MAX.
89   // mv_component[1][i] is the cost of motion vector with vertical component
90   // (mv_col) equal to i - MV_MAX.
91   int mv_component[2][MV_VALS];
92 
93   // joint_mv[i] is the cost of transmitting joint mv(MV_JOINT_TYPE) of
94   // type i.
95   // TODO(huisu@google.com): we can update dv_joint_cost per SB.
96   int joint_mv[MV_JOINTS];
97 } IntraBCMVCosts;
98 
av1_init_rd_stats(RD_STATS * rd_stats)99 static INLINE void av1_init_rd_stats(RD_STATS *rd_stats) {
100 #if CONFIG_RD_DEBUG
101   int plane;
102 #endif
103   rd_stats->rate = 0;
104   rd_stats->dist = 0;
105   rd_stats->rdcost = 0;
106   rd_stats->sse = 0;
107   rd_stats->skip = 1;
108   rd_stats->zero_rate = 0;
109 #if CONFIG_RD_DEBUG
110   // This may run into problems when monochrome video is
111   // encoded, as there will only be 1 plane
112   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
113     rd_stats->txb_coeff_cost[plane] = 0;
114     {
115       int r, c;
116       for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
117         for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
118           rd_stats->txb_coeff_cost_map[plane][r][c] = 0;
119     }
120   }
121 #endif
122 }
123 
av1_invalid_rd_stats(RD_STATS * rd_stats)124 static INLINE void av1_invalid_rd_stats(RD_STATS *rd_stats) {
125 #if CONFIG_RD_DEBUG
126   int plane;
127 #endif
128   rd_stats->rate = INT_MAX;
129   rd_stats->dist = INT64_MAX;
130   rd_stats->rdcost = INT64_MAX;
131   rd_stats->sse = INT64_MAX;
132   rd_stats->skip = 0;
133   rd_stats->zero_rate = 0;
134 #if CONFIG_RD_DEBUG
135   // This may run into problems when monochrome video is
136   // encoded, as there will only be 1 plane
137   for (plane = 0; plane < MAX_MB_PLANE; ++plane) {
138     rd_stats->txb_coeff_cost[plane] = INT_MAX;
139     {
140       int r, c;
141       for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
142         for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c)
143           rd_stats->txb_coeff_cost_map[plane][r][c] = INT16_MAX;
144     }
145   }
146 #endif
147 }
148 
av1_merge_rd_stats(RD_STATS * rd_stats_dst,const RD_STATS * rd_stats_src)149 static INLINE void av1_merge_rd_stats(RD_STATS *rd_stats_dst,
150                                       const RD_STATS *rd_stats_src) {
151   assert(rd_stats_dst->rate != INT_MAX && rd_stats_src->rate != INT_MAX);
152   rd_stats_dst->rate = (int)AOMMIN(
153       ((int64_t)rd_stats_dst->rate + (int64_t)rd_stats_src->rate), INT_MAX);
154   if (!rd_stats_dst->zero_rate)
155     rd_stats_dst->zero_rate = rd_stats_src->zero_rate;
156   rd_stats_dst->dist += rd_stats_src->dist;
157   rd_stats_dst->sse += rd_stats_src->sse;
158   rd_stats_dst->skip &= rd_stats_src->skip;
159 #if CONFIG_RD_DEBUG
160   // This may run into problems when monochrome video is
161   // encoded, as there will only be 1 plane
162   for (int plane = 0; plane < MAX_MB_PLANE; ++plane) {
163     rd_stats_dst->txb_coeff_cost[plane] += rd_stats_src->txb_coeff_cost[plane];
164     {
165       // TODO(angiebird): optimize this part
166       int r, c;
167       int ref_txb_coeff_cost = 0;
168       for (r = 0; r < TXB_COEFF_COST_MAP_SIZE; ++r)
169         for (c = 0; c < TXB_COEFF_COST_MAP_SIZE; ++c) {
170           rd_stats_dst->txb_coeff_cost_map[plane][r][c] +=
171               rd_stats_src->txb_coeff_cost_map[plane][r][c];
172           ref_txb_coeff_cost += rd_stats_dst->txb_coeff_cost_map[plane][r][c];
173         }
174       assert(ref_txb_coeff_cost == rd_stats_dst->txb_coeff_cost[plane]);
175     }
176   }
177 #endif
178 }
179 
av1_accumulate_rd_stats(RD_STATS * rd_stats,int64_t dist,int rate,int skip,int64_t sse,int zero_rate)180 static INLINE void av1_accumulate_rd_stats(RD_STATS *rd_stats, int64_t dist,
181                                            int rate, int skip, int64_t sse,
182                                            int zero_rate) {
183   assert(rd_stats->rate != INT_MAX && rate != INT_MAX);
184   rd_stats->rate += rate;
185   if (!rd_stats->zero_rate) rd_stats->zero_rate = zero_rate;
186   rd_stats->dist += dist;
187   rd_stats->skip &= skip;
188   rd_stats->sse += sse;
189 }
190 
av1_calculate_rd_cost(int mult,int rate,int64_t dist)191 static INLINE int64_t av1_calculate_rd_cost(int mult, int rate, int64_t dist) {
192   assert(mult >= 0);
193   if (rate >= 0) {
194     return RDCOST(mult, rate, dist);
195   }
196   return RDCOST_NEG_R(mult, -rate, dist);
197 }
198 
av1_rd_cost_update(int mult,RD_STATS * rd_cost)199 static INLINE void av1_rd_cost_update(int mult, RD_STATS *rd_cost) {
200   if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX &&
201       rd_cost->rdcost < INT64_MAX) {
202     rd_cost->rdcost = av1_calculate_rd_cost(mult, rd_cost->rate, rd_cost->dist);
203   } else {
204     av1_invalid_rd_stats(rd_cost);
205   }
206 }
207 
av1_rd_stats_subtraction(int mult,const RD_STATS * const left,const RD_STATS * const right,RD_STATS * result)208 static INLINE void av1_rd_stats_subtraction(int mult,
209                                             const RD_STATS *const left,
210                                             const RD_STATS *const right,
211                                             RD_STATS *result) {
212   if (left->rate == INT_MAX || right->rate == INT_MAX ||
213       left->dist == INT64_MAX || right->dist == INT64_MAX ||
214       left->rdcost == INT64_MAX || right->rdcost == INT64_MAX) {
215     av1_invalid_rd_stats(result);
216   } else {
217     result->rate = left->rate - right->rate;
218     result->dist = left->dist - right->dist;
219     result->rdcost = av1_calculate_rd_cost(mult, result->rate, result->dist);
220   }
221 }
222 
223 struct TileInfo;
224 struct TileDataEnc;
225 struct AV1_COMP;
226 struct macroblock;
227 
228 int av1_compute_rd_mult_based_on_qindex(const struct AV1_COMP *cpi, int qindex);
229 
230 int av1_compute_rd_mult(const struct AV1_COMP *cpi, int qindex);
231 
232 void av1_initialize_rd_consts(struct AV1_COMP *cpi);
233 
234 void av1_initialize_me_consts(const struct AV1_COMP *cpi, MACROBLOCK *x,
235                               int qindex);
236 
237 void av1_model_rd_from_var_lapndz(int64_t var, unsigned int n,
238                                   unsigned int qstep, int *rate, int64_t *dist);
239 
240 void av1_model_rd_curvfit(BLOCK_SIZE bsize, double sse_norm, double xqr,
241                           double *rate_f, double *distbysse_f);
242 void av1_model_rd_surffit(BLOCK_SIZE bsize, double sse_norm, double xm,
243                           double yl, double *rate_f, double *distbysse_f);
244 
245 int av1_get_switchable_rate(const MACROBLOCK *x, const MACROBLOCKD *xd,
246                             InterpFilter interp_filter);
247 
248 YV12_BUFFER_CONFIG *av1_get_scaled_ref_frame(const struct AV1_COMP *cpi,
249                                              int ref_frame);
250 
251 void av1_init_me_luts(void);
252 
253 void av1_set_mvcost(MACROBLOCK *x, int ref, int ref_mv_idx);
254 
255 void av1_get_entropy_contexts(BLOCK_SIZE plane_bsize,
256                               const struct macroblockd_plane *pd,
257                               ENTROPY_CONTEXT t_above[MAX_MIB_SIZE],
258                               ENTROPY_CONTEXT t_left[MAX_MIB_SIZE]);
259 
260 void av1_set_rd_speed_thresholds(struct AV1_COMP *cpi);
261 
262 void av1_update_rd_thresh_fact(const AV1_COMMON *const cm,
263                                int (*fact)[MAX_MODES], int rd_thresh,
264                                BLOCK_SIZE bsize, THR_MODES best_mode_index);
265 
reset_thresh_freq_fact(MACROBLOCK * const x)266 static INLINE void reset_thresh_freq_fact(MACROBLOCK *const x) {
267   for (int i = 0; i < BLOCK_SIZES_ALL; ++i) {
268     for (int j = 0; j < MAX_MODES; ++j) {
269       x->thresh_freq_fact[i][j] = RD_THRESH_FAC_FRAC_VAL;
270     }
271   }
272 }
273 
rd_less_than_thresh(int64_t best_rd,int thresh,int thresh_fact)274 static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh,
275                                       int thresh_fact) {
276   return best_rd < ((int64_t)thresh * thresh_fact >> 5) || thresh == INT_MAX;
277 }
278 
279 void av1_mv_pred(const struct AV1_COMP *cpi, MACROBLOCK *x,
280                  uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame,
281                  BLOCK_SIZE block_size);
282 
set_error_per_bit(MACROBLOCK * x,int rdmult)283 static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) {
284   x->errorperbit = rdmult >> RD_EPB_SHIFT;
285   x->errorperbit += (x->errorperbit == 0);
286 }
287 
288 // Get the threshold for R-D optimization of coefficients depending upon mode
289 // decision/winner mode processing
get_rd_opt_coeff_thresh(const uint32_t * const coeff_opt_dist_threshold,int enable_winner_mode_for_coeff_opt,int is_winner_mode)290 static INLINE uint32_t get_rd_opt_coeff_thresh(
291     const uint32_t *const coeff_opt_dist_threshold,
292     int enable_winner_mode_for_coeff_opt, int is_winner_mode) {
293   // Default initialization of threshold
294   uint32_t coeff_opt_thresh = coeff_opt_dist_threshold[DEFAULT_EVAL];
295   // TODO(any): Experiment with coeff_opt_dist_threshold values when
296   // enable_winner_mode_for_coeff_opt is ON
297   // TODO(any): Skip the winner mode processing for blocks with lower residual
298   // energy as R-D optimization of coefficients would have been enabled during
299   // mode decision
300   if (enable_winner_mode_for_coeff_opt) {
301     // Use conservative threshold during mode decision and perform R-D
302     // optimization of coeffs always for winner modes
303     if (is_winner_mode)
304       coeff_opt_thresh = coeff_opt_dist_threshold[WINNER_MODE_EVAL];
305     else
306       coeff_opt_thresh = coeff_opt_dist_threshold[MODE_EVAL];
307   }
308   return coeff_opt_thresh;
309 }
310 
311 // Used to reset the state of tx/mb rd hash information
reset_hash_records(MACROBLOCK * const x,int use_inter_txb_hash)312 static INLINE void reset_hash_records(MACROBLOCK *const x,
313                                       int use_inter_txb_hash) {
314   int32_t record_idx;
315 
316   // Reset the state for use_inter_txb_hash
317   if (use_inter_txb_hash) {
318     for (record_idx = 0;
319          record_idx < ((MAX_MIB_SIZE >> 1) * (MAX_MIB_SIZE >> 1)); record_idx++)
320       x->txb_rd_record_8X8[record_idx].num =
321           x->txb_rd_record_8X8[record_idx].index_start = 0;
322     for (record_idx = 0;
323          record_idx < ((MAX_MIB_SIZE >> 2) * (MAX_MIB_SIZE >> 2)); record_idx++)
324       x->txb_rd_record_16X16[record_idx].num =
325           x->txb_rd_record_16X16[record_idx].index_start = 0;
326     for (record_idx = 0;
327          record_idx < ((MAX_MIB_SIZE >> 3) * (MAX_MIB_SIZE >> 3)); record_idx++)
328       x->txb_rd_record_32X32[record_idx].num =
329           x->txb_rd_record_32X32[record_idx].index_start = 0;
330     for (record_idx = 0;
331          record_idx < ((MAX_MIB_SIZE >> 4) * (MAX_MIB_SIZE >> 4)); record_idx++)
332       x->txb_rd_record_64X64[record_idx].num =
333           x->txb_rd_record_64X64[record_idx].index_start = 0;
334   }
335 
336   // Reset the state for use_intra_txb_hash
337   x->txb_rd_record_intra.num = x->txb_rd_record_intra.index_start = 0;
338 
339   // Reset the state for use_mb_rd_hash
340   x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
341 }
342 
343 void av1_setup_pred_block(const MACROBLOCKD *xd,
344                           struct buf_2d dst[MAX_MB_PLANE],
345                           const YV12_BUFFER_CONFIG *src,
346                           const struct scale_factors *scale,
347                           const struct scale_factors *scale_uv,
348                           const int num_planes);
349 
350 int av1_get_intra_cost_penalty(int qindex, int qdelta,
351                                aom_bit_depth_t bit_depth);
352 
353 void av1_fill_mode_rates(AV1_COMMON *const cm, MACROBLOCK *x,
354                          FRAME_CONTEXT *fc);
355 
356 void av1_fill_coeff_costs(MACROBLOCK *x, FRAME_CONTEXT *fc,
357                           const int num_planes);
358 
359 void av1_fill_mv_costs(const FRAME_CONTEXT *fc, int integer_mv, int usehp,
360                        MACROBLOCK *x);
361 
362 int av1_get_adaptive_rdmult(const struct AV1_COMP *cpi, double beta);
363 
364 int av1_get_deltaq_offset(const struct AV1_COMP *cpi, int qindex, double beta);
365 
366 #ifdef __cplusplus
367 }  // extern "C"
368 #endif
369 
370 #endif  // AOM_AV1_ENCODER_RD_H_
371