1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22 
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32 
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 
43 #define RD_THRESH_POW 1.25
44 
45 // Factor to weigh the rate for switchable interp filters.
46 #define SWITCHABLE_INTERP_RATE_FACTOR 1
47 
vp9_rd_cost_reset(RD_COST * rd_cost)48 void vp9_rd_cost_reset(RD_COST *rd_cost) {
49   rd_cost->rate = INT_MAX;
50   rd_cost->dist = INT64_MAX;
51   rd_cost->rdcost = INT64_MAX;
52 }
53 
vp9_rd_cost_init(RD_COST * rd_cost)54 void vp9_rd_cost_init(RD_COST *rd_cost) {
55   rd_cost->rate = 0;
56   rd_cost->dist = 0;
57   rd_cost->rdcost = 0;
58 }
59 
60 // The baseline rd thresholds for breaking out of the rd loop for
61 // certain modes are assumed to be based on 8x8 blocks.
62 // This table is used to correct for block size.
63 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
64 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
65   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
66 };
67 
fill_mode_costs(VP9_COMP * cpi)68 static void fill_mode_costs(VP9_COMP *cpi) {
69   const FRAME_CONTEXT *const fc = cpi->common.fc;
70   int i, j;
71 
72   for (i = 0; i < INTRA_MODES; ++i) {
73     for (j = 0; j < INTRA_MODES; ++j) {
74       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
75                       vp9_intra_mode_tree);
76     }
77   }
78 
79   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
80   for (i = 0; i < INTRA_MODES; ++i) {
81     vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i],
82                     vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree);
83     vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i],
84                     fc->uv_mode_prob[i], vp9_intra_mode_tree);
85   }
86 
87   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) {
88     vp9_cost_tokens(cpi->switchable_interp_costs[i],
89                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
90   }
91 
92   for (i = TX_8X8; i < TX_SIZES; ++i) {
93     for (j = 0; j < TX_SIZE_CONTEXTS; ++j) {
94       const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs);
95       int k;
96       for (k = 0; k <= i; ++k) {
97         int cost = 0;
98         int m;
99         for (m = 0; m <= k - (k == i); ++m) {
100           if (m == k)
101             cost += vp9_cost_zero(tx_probs[m]);
102           else
103             cost += vp9_cost_one(tx_probs[m]);
104         }
105         cpi->tx_size_cost[i - 1][j][k] = cost;
106       }
107     }
108   }
109 }
110 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])111 static void fill_token_costs(vp9_coeff_cost *c,
112                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
113   int i, j, k, l;
114   TX_SIZE t;
115   for (t = TX_4X4; t <= TX_32X32; ++t)
116     for (i = 0; i < PLANE_TYPES; ++i)
117       for (j = 0; j < REF_TYPES; ++j)
118         for (k = 0; k < COEF_BANDS; ++k)
119           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
120             vpx_prob probs[ENTROPY_NODES];
121             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
122             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree);
123             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
124                                  vp9_coef_tree);
125             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
126                    c[t][i][j][k][1][l][EOB_TOKEN]);
127           }
128 }
129 
130 // Values are now correlated to quantizer.
131 static int sad_per_bit16lut_8[QINDEX_RANGE];
132 static int sad_per_bit4lut_8[QINDEX_RANGE];
133 
134 #if CONFIG_VP9_HIGHBITDEPTH
135 static int sad_per_bit16lut_10[QINDEX_RANGE];
136 static int sad_per_bit4lut_10[QINDEX_RANGE];
137 static int sad_per_bit16lut_12[QINDEX_RANGE];
138 static int sad_per_bit4lut_12[QINDEX_RANGE];
139 #endif
140 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)141 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
142                             vpx_bit_depth_t bit_depth) {
143   int i;
144   // Initialize the sad lut tables using a formulaic calculation for now.
145   // This is to make it easier to resolve the impact of experimental changes
146   // to the quantizer tables.
147   for (i = 0; i < range; i++) {
148     const double q = vp9_convert_qindex_to_q(i, bit_depth);
149     bit16lut[i] = (int)(0.0418 * q + 2.4107);
150     bit4lut[i] = (int)(0.063 * q + 2.742);
151   }
152 }
153 
vp9_init_me_luts(void)154 void vp9_init_me_luts(void) {
155   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
156                   VPX_BITS_8);
157 #if CONFIG_VP9_HIGHBITDEPTH
158   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
159                   VPX_BITS_10);
160   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
161                   VPX_BITS_12);
162 #endif
163 }
164 
165 static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12,
166                                          8,  8,  4,  4,  2,  2,  1,  0 };
167 
168 // Note that the element below for frame type "USE_BUF_FRAME", which indicates
169 // that the show frame flag is set, should not be used as no real frame
170 // is encoded so we should not reach here. However, a dummy value
171 // is inserted here to make sure the data structure has the right number
172 // of values assigned.
173 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128,
174                                                               128, 144, 144 };
175 
vp9_compute_rd_mult_based_on_qindex(const VP9_COMP * cpi,int qindex)176 int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) {
177   // largest dc_quant is 21387, therefore rdmult should always fit in int32_t
178   const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
179   uint32_t rdmult = q * q;
180 
181   if (cpi->common.frame_type != KEY_FRAME) {
182     if (qindex < 128)
183       rdmult = rdmult * 4;
184     else if (qindex < 190)
185       rdmult = rdmult * 4 + rdmult / 2;
186     else
187       rdmult = rdmult * 3;
188   } else {
189     if (qindex < 64)
190       rdmult = rdmult * 4;
191     else if (qindex <= 128)
192       rdmult = rdmult * 3 + rdmult / 2;
193     else if (qindex < 190)
194       rdmult = rdmult * 4 + rdmult / 2;
195     else
196       rdmult = rdmult * 7 + rdmult / 2;
197   }
198 #if CONFIG_VP9_HIGHBITDEPTH
199   switch (cpi->common.bit_depth) {
200     case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break;
201     case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break;
202     default: break;
203   }
204 #endif  // CONFIG_VP9_HIGHBITDEPTH
205   return rdmult > 0 ? rdmult : 1;
206 }
207 
modulate_rdmult(const VP9_COMP * cpi,int rdmult)208 static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) {
209   int64_t rdmult_64 = rdmult;
210   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
211     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
212     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
213     const int gfu_boost = cpi->multi_layer_arf
214                               ? gf_group->gfu_boost[gf_group->index]
215                               : cpi->rc.gfu_boost;
216     const int boost_index = VPXMIN(15, (gfu_boost / 100));
217 
218     rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7;
219     rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7);
220   }
221   return (int)rdmult_64;
222 }
223 
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)224 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
225   int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex);
226   return modulate_rdmult(cpi, rdmult);
227 }
228 
vp9_get_adaptive_rdmult(const VP9_COMP * cpi,double beta)229 int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) {
230   int rdmult =
231       vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex);
232   rdmult = (int)((double)rdmult / beta);
233   rdmult = rdmult > 0 ? rdmult : 1;
234   return modulate_rdmult(cpi, rdmult);
235 }
236 
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)237 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
238   double q;
239 #if CONFIG_VP9_HIGHBITDEPTH
240   switch (bit_depth) {
241     case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break;
242     case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break;
243     default:
244       assert(bit_depth == VPX_BITS_12);
245       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
246       break;
247   }
248 #else
249   (void)bit_depth;
250   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
251 #endif  // CONFIG_VP9_HIGHBITDEPTH
252   // TODO(debargha): Adjust the function below.
253   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
254 }
255 
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)256 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
257 #if CONFIG_VP9_HIGHBITDEPTH
258   switch (cpi->common.bit_depth) {
259     case VPX_BITS_8:
260       x->sadperbit16 = sad_per_bit16lut_8[qindex];
261       x->sadperbit4 = sad_per_bit4lut_8[qindex];
262       break;
263     case VPX_BITS_10:
264       x->sadperbit16 = sad_per_bit16lut_10[qindex];
265       x->sadperbit4 = sad_per_bit4lut_10[qindex];
266       break;
267     default:
268       assert(cpi->common.bit_depth == VPX_BITS_12);
269       x->sadperbit16 = sad_per_bit16lut_12[qindex];
270       x->sadperbit4 = sad_per_bit4lut_12[qindex];
271       break;
272   }
273 #else
274   (void)cpi;
275   x->sadperbit16 = sad_per_bit16lut_8[qindex];
276   x->sadperbit4 = sad_per_bit4lut_8[qindex];
277 #endif  // CONFIG_VP9_HIGHBITDEPTH
278 }
279 
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)280 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
281   int i, bsize, segment_id;
282 
283   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
284     const int qindex =
285         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
286                   cm->y_dc_delta_q,
287               0, MAXQ);
288     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
289 
290     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
291       // Threshold here seems unnecessarily harsh but fine given actual
292       // range of values used for cpi->sf.thresh_mult[].
293       const int t = q * rd_thresh_block_size_factor[bsize];
294       const int thresh_max = INT_MAX / t;
295 
296       if (bsize >= BLOCK_8X8) {
297         for (i = 0; i < MAX_MODES; ++i)
298           rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max
299                                                    ? rd->thresh_mult[i] * t / 4
300                                                    : INT_MAX;
301       } else {
302         for (i = 0; i < MAX_REFS; ++i)
303           rd->threshes[segment_id][bsize][i] =
304               rd->thresh_mult_sub8x8[i] < thresh_max
305                   ? rd->thresh_mult_sub8x8[i] * t / 4
306                   : INT_MAX;
307       }
308     }
309   }
310 }
311 
vp9_initialize_rd_consts(VP9_COMP * cpi)312 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
313   VP9_COMMON *const cm = &cpi->common;
314   MACROBLOCK *const x = &cpi->td.mb;
315   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
316   RD_OPT *const rd = &cpi->rd;
317   int i;
318 
319   vpx_clear_system_state();
320 
321   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
322   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
323 
324   set_error_per_bit(x, rd->RDMULT);
325 
326   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
327                        cm->frame_type != KEY_FRAME)
328                           ? 0
329                           : 1;
330 
331   set_block_thresholds(cm, rd);
332   set_partition_probs(cm, xd);
333 
334   if (cpi->oxcf.pass == 1) {
335     if (!frame_is_intra_only(cm))
336       vp9_build_nmv_cost_table(
337           x->nmvjointcost,
338           cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
339           &cm->fc->nmvc, cm->allow_high_precision_mv);
340   } else {
341     if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
342       fill_token_costs(x->token_costs, cm->fc->coef_probs);
343 
344     if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
345         cm->frame_type == KEY_FRAME) {
346       for (i = 0; i < PARTITION_CONTEXTS; ++i)
347         vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
348                         vp9_partition_tree);
349     }
350 
351     if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
352         cm->frame_type == KEY_FRAME) {
353       fill_mode_costs(cpi);
354 
355       if (!frame_is_intra_only(cm)) {
356         vp9_build_nmv_cost_table(
357             x->nmvjointcost,
358             cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost,
359             &cm->fc->nmvc, cm->allow_high_precision_mv);
360 
361         for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
362           vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
363                           cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
364       }
365     }
366   }
367 }
368 
369 // NOTE: The tables below must be of the same size.
370 
371 // The functions described below are sampled at the four most significant
372 // bits of x^2 + 8 / 256.
373 
374 // Normalized rate:
375 // This table models the rate for a Laplacian source with given variance
376 // when quantized with a uniform quantizer with given stepsize. The
377 // closed form expression is:
378 // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
379 // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
380 // and H(x) is the binary entropy function.
381 static const int rate_tab_q10[] = {
382   65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044,
383   3958,  3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037,
384   2952,  2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179,
385   2130,  2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398,
386   1342,  1290, 1243, 1199, 1159, 1086, 1021, 963,  911,  864,  821,  781,  745,
387   680,   623,  574,  530,  490,  455,  424,  395,  345,  304,  269,  239,  213,
388   190,   171,  154,  126,  104,  87,   73,   61,   52,   44,   38,   28,   21,
389   16,    12,   10,   8,    6,    5,    3,    2,    1,    1,    1,    0,    0,
390 };
391 
392 // Normalized distortion:
393 // This table models the normalized distortion for a Laplacian source
394 // with given variance when quantized with a uniform quantizer
395 // with given stepsize. The closed form expression is:
396 // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
397 // where x = qpstep / sqrt(variance).
398 // Note the actual distortion is Dn * variance.
399 static const int dist_tab_q10[] = {
400   0,    0,    1,    1,    1,    2,    2,    2,    3,    3,    4,    5,    5,
401   6,    7,    7,    8,    9,    11,   12,   13,   15,   16,   17,   18,   21,
402   24,   26,   29,   31,   34,   36,   39,   44,   49,   54,   59,   64,   69,
403   73,   78,   88,   97,   106,  115,  124,  133,  142,  151,  167,  184,  200,
404   215,  231,  245,  260,  274,  301,  327,  351,  375,  397,  418,  439,  458,
405   495,  528,  559,  587,  613,  637,  659,  680,  717,  749,  777,  801,  823,
406   842,  859,  874,  899,  919,  936,  949,  960,  969,  977,  983,  994,  1001,
407   1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024,
408 };
409 static const int xsq_iq_q10[] = {
410   0,      4,      8,      12,     16,     20,     24,     28,     32,
411   40,     48,     56,     64,     72,     80,     88,     96,     112,
412   128,    144,    160,    176,    192,    208,    224,    256,    288,
413   320,    352,    384,    416,    448,    480,    544,    608,    672,
414   736,    800,    864,    928,    992,    1120,   1248,   1376,   1504,
415   1632,   1760,   1888,   2016,   2272,   2528,   2784,   3040,   3296,
416   3552,   3808,   4064,   4576,   5088,   5600,   6112,   6624,   7136,
417   7648,   8160,   9184,   10208,  11232,  12256,  13280,  14304,  15328,
418   16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,  32736,
419   36832,  40928,  45024,  49120,  53216,  57312,  61408,  65504,  73696,
420   81888,  90080,  98272,  106464, 114656, 122848, 131040, 147424, 163808,
421   180192, 196576, 212960, 229344, 245728,
422 };
423 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)424 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
425   const int tmp = (xsq_q10 >> 2) + 8;
426   const int k = get_msb(tmp) - 3;
427   const int xq = (k << 3) + ((tmp >> k) & 0x7);
428   const int one_q10 = 1 << 10;
429   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
430   const int b_q10 = one_q10 - a_q10;
431   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
432   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
433 }
434 
model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],int r_q10[MAX_MB_PLANE],int d_q10[MAX_MB_PLANE])435 static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE],
436                               int r_q10[MAX_MB_PLANE],
437                               int d_q10[MAX_MB_PLANE]) {
438   int i;
439   const int one_q10 = 1 << 10;
440   for (i = 0; i < MAX_MB_PLANE; ++i) {
441     const int tmp = (xsq_q10[i] >> 2) + 8;
442     const int k = get_msb(tmp) - 3;
443     const int xq = (k << 3) + ((tmp >> k) & 0x7);
444     const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k);
445     const int b_q10 = one_q10 - a_q10;
446     r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
447     d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
448   }
449 }
450 
451 static const uint32_t MAX_XSQ_Q10 = 245727;
452 
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)453 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
454                                   unsigned int qstep, int *rate,
455                                   int64_t *dist) {
456   // This function models the rate and distortion for a Laplacian
457   // source with given variance when quantized with a uniform quantizer
458   // with given stepsize. The closed form expressions are in:
459   // Hang and Chen, "Source Model for transform video coder and its
460   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
461   // Sys. for Video Tech., April 1997.
462   if (var == 0) {
463     *rate = 0;
464     *dist = 0;
465   } else {
466     int d_q10, r_q10;
467     const uint64_t xsq_q10_64 =
468         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
469     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
470     model_rd_norm(xsq_q10, &r_q10, &d_q10);
471     *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT);
472     *dist = (var * (int64_t)d_q10 + 512) >> 10;
473   }
474 }
475 
476 // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where
477 // vectors are of length MAX_MB_PLANE and all elements of var are non-zero.
vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],unsigned int n_log2[MAX_MB_PLANE],unsigned int qstep[MAX_MB_PLANE],int64_t * rate_sum,int64_t * dist_sum)478 void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE],
479                                       unsigned int n_log2[MAX_MB_PLANE],
480                                       unsigned int qstep[MAX_MB_PLANE],
481                                       int64_t *rate_sum, int64_t *dist_sum) {
482   int i;
483   int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE];
484   for (i = 0; i < MAX_MB_PLANE; ++i) {
485     const uint64_t xsq_q10_64 =
486         (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) /
487         var[i];
488     xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
489   }
490   model_rd_norm_vec(xsq_q10, r_q10, d_q10);
491   for (i = 0; i < MAX_MB_PLANE; ++i) {
492     int rate =
493         ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT);
494     int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10;
495     *rate_sum += rate;
496     *dist_sum += dist;
497   }
498 }
499 
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])500 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
501                               const struct macroblockd_plane *pd,
502                               ENTROPY_CONTEXT t_above[16],
503                               ENTROPY_CONTEXT t_left[16]) {
504   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
505   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
506   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
507   const ENTROPY_CONTEXT *const above = pd->above_context;
508   const ENTROPY_CONTEXT *const left = pd->left_context;
509 
510   int i;
511   switch (tx_size) {
512     case TX_4X4:
513       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
514       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
515       break;
516     case TX_8X8:
517       for (i = 0; i < num_4x4_w; i += 2)
518         t_above[i] = !!*(const uint16_t *)&above[i];
519       for (i = 0; i < num_4x4_h; i += 2)
520         t_left[i] = !!*(const uint16_t *)&left[i];
521       break;
522     case TX_16X16:
523       for (i = 0; i < num_4x4_w; i += 4)
524         t_above[i] = !!*(const uint32_t *)&above[i];
525       for (i = 0; i < num_4x4_h; i += 4)
526         t_left[i] = !!*(const uint32_t *)&left[i];
527       break;
528     default:
529       assert(tx_size == TX_32X32);
530       for (i = 0; i < num_4x4_w; i += 8)
531         t_above[i] = !!*(const uint64_t *)&above[i];
532       for (i = 0; i < num_4x4_h; i += 8)
533         t_left[i] = !!*(const uint64_t *)&left[i];
534       break;
535   }
536 }
537 
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)538 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer,
539                  int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) {
540   int i;
541   int zero_seen = 0;
542   int best_index = 0;
543   int best_sad = INT_MAX;
544   int this_sad = INT_MAX;
545   int max_mv = 0;
546   int near_same_nearest;
547   uint8_t *src_y_ptr = x->plane[0].src.buf;
548   uint8_t *ref_y_ptr;
549   const int num_mv_refs =
550       MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size);
551 
552   MV pred_mv[3];
553   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
554   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
555   pred_mv[2] = x->pred_mv[ref_frame];
556   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
557 
558   near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
559                       x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
560 
561   // Get the sad for each candidate reference mv.
562   for (i = 0; i < num_mv_refs; ++i) {
563     const MV *this_mv = &pred_mv[i];
564     int fp_row, fp_col;
565     if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue;
566     if (i == 1 && near_same_nearest) continue;
567     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
568     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
569     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
570 
571     if (fp_row == 0 && fp_col == 0 && zero_seen) continue;
572     zero_seen |= (fp_row == 0 && fp_col == 0);
573 
574     ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col];
575     // Find sad for current vector.
576     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
577                                            ref_y_ptr, ref_y_stride);
578     // Note if it is the best so far.
579     if (this_sad < best_sad) {
580       best_sad = this_sad;
581       best_index = i;
582     }
583   }
584 
585   // Note the index of the mv that worked best in the reference list.
586   x->mv_best_ref_index[ref_frame] = best_index;
587   x->max_mv_context[ref_frame] = max_mv;
588   x->pred_mv_sad[ref_frame] = best_sad;
589 }
590 
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)591 void vp9_setup_pred_block(const MACROBLOCKD *xd,
592                           struct buf_2d dst[MAX_MB_PLANE],
593                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
594                           const struct scale_factors *scale,
595                           const struct scale_factors *scale_uv) {
596   int i;
597 
598   dst[0].buf = src->y_buffer;
599   dst[0].stride = src->y_stride;
600   dst[1].buf = src->u_buffer;
601   dst[2].buf = src->v_buffer;
602   dst[1].stride = dst[2].stride = src->uv_stride;
603 
604   for (i = 0; i < MAX_MB_PLANE; ++i) {
605     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
606                      i ? scale_uv : scale, xd->plane[i].subsampling_x,
607                      xd->plane[i].subsampling_y);
608   }
609 }
610 
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)611 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block,
612                             int stride) {
613   const int bw = b_width_log2_lookup[plane_bsize];
614   const int y = 4 * (raster_block >> bw);
615   const int x = 4 * (raster_block & ((1 << bw) - 1));
616   return y * stride + x;
617 }
618 
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)619 int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block,
620                                        int16_t *base) {
621   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
622   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
623 }
624 
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)625 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
626                                              int ref_frame) {
627   const VP9_COMMON *const cm = &cpi->common;
628   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
629   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
630   assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME);
631   return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX)
632              ? &cm->buffer_pool->frame_bufs[scaled_idx].buf
633              : NULL;
634 }
635 
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)636 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
637   const MODE_INFO *const mi = xd->mi[0];
638   const int ctx = get_pred_context_switchable_interp(xd);
639   return SWITCHABLE_INTERP_RATE_FACTOR *
640          cpi->switchable_interp_costs[ctx][mi->interp_filter];
641 }
642 
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)643 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
644   int i;
645   RD_OPT *const rd = &cpi->rd;
646   SPEED_FEATURES *const sf = &cpi->sf;
647 
648   // Set baseline threshold values.
649   for (i = 0; i < MAX_MODES; ++i)
650     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
651 
652   if (sf->adaptive_rd_thresh) {
653     rd->thresh_mult[THR_NEARESTMV] = 300;
654     rd->thresh_mult[THR_NEARESTG] = 300;
655     rd->thresh_mult[THR_NEARESTA] = 300;
656   } else {
657     rd->thresh_mult[THR_NEARESTMV] = 0;
658     rd->thresh_mult[THR_NEARESTG] = 0;
659     rd->thresh_mult[THR_NEARESTA] = 0;
660   }
661 
662   rd->thresh_mult[THR_DC] += 1000;
663 
664   rd->thresh_mult[THR_NEWMV] += 1000;
665   rd->thresh_mult[THR_NEWA] += 1000;
666   rd->thresh_mult[THR_NEWG] += 1000;
667 
668   rd->thresh_mult[THR_NEARMV] += 1000;
669   rd->thresh_mult[THR_NEARA] += 1000;
670   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
671   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
672 
673   rd->thresh_mult[THR_TM] += 1000;
674 
675   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
676   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
677   rd->thresh_mult[THR_NEARG] += 1000;
678   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
679   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
680 
681   rd->thresh_mult[THR_ZEROMV] += 2000;
682   rd->thresh_mult[THR_ZEROG] += 2000;
683   rd->thresh_mult[THR_ZEROA] += 2000;
684   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
685   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
686 
687   rd->thresh_mult[THR_H_PRED] += 2000;
688   rd->thresh_mult[THR_V_PRED] += 2000;
689   rd->thresh_mult[THR_D45_PRED] += 2500;
690   rd->thresh_mult[THR_D135_PRED] += 2500;
691   rd->thresh_mult[THR_D117_PRED] += 2500;
692   rd->thresh_mult[THR_D153_PRED] += 2500;
693   rd->thresh_mult[THR_D207_PRED] += 2500;
694   rd->thresh_mult[THR_D63_PRED] += 2500;
695 }
696 
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)697 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
698   static const int thresh_mult[2][MAX_REFS] = {
699     { 2500, 2500, 2500, 4500, 4500, 2500 },
700     { 2000, 2000, 2000, 4000, 4000, 2000 }
701   };
702   RD_OPT *const rd = &cpi->rd;
703   const int idx = cpi->oxcf.mode == BEST;
704   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
705 }
706 
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)707 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
708                                int bsize, int best_mode_index) {
709   if (rd_thresh > 0) {
710     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
711     int mode;
712     for (mode = 0; mode < top_mode; ++mode) {
713       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
714       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
715       BLOCK_SIZE bs;
716       for (bs = min_size; bs <= max_size; ++bs) {
717         int *const fact = &factor_buf[bs][mode];
718         if (mode == best_mode_index) {
719           *fact -= (*fact >> 4);
720         } else {
721           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
722         }
723       }
724     }
725   }
726 }
727 
vp9_get_intra_cost_penalty(const VP9_COMP * const cpi,BLOCK_SIZE bsize,int qindex,int qdelta)728 int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize,
729                                int qindex, int qdelta) {
730   // Reduce the intra cost penalty for small blocks (<=16x16).
731   int reduction_fac =
732       (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0;
733 
734   if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh)
735     // Don't reduce intra cost penalty if estimated noise level is high.
736     reduction_fac = 0;
737 
738   // Always use VPX_BITS_8 as input here because the penalty is applied
739   // to rate not distortion so we want a consistent penalty for all bit
740   // depths. If the actual bit depth were passed in here then the value
741   // retured by vp9_dc_quant() would scale with the bit depth and we would
742   // then need to apply inverse scaling to correct back to a bit depth
743   // independent rate penalty.
744   return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac;
745 }
746