1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <assert.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 
17 #include "vpx_dsp/vpx_dsp_common.h"
18 #include "vpx_mem/vpx_mem.h"
19 #include "vpx_ports/bitops.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/system_state.h"
22 
23 #include "vp9/common/vp9_common.h"
24 #include "vp9/common/vp9_entropy.h"
25 #include "vp9/common/vp9_entropymode.h"
26 #include "vp9/common/vp9_mvref_common.h"
27 #include "vp9/common/vp9_pred_common.h"
28 #include "vp9/common/vp9_quant_common.h"
29 #include "vp9/common/vp9_reconinter.h"
30 #include "vp9/common/vp9_reconintra.h"
31 #include "vp9/common/vp9_seg_common.h"
32 
33 #include "vp9/encoder/vp9_cost.h"
34 #include "vp9/encoder/vp9_encodemb.h"
35 #include "vp9/encoder/vp9_encodemv.h"
36 #include "vp9/encoder/vp9_encoder.h"
37 #include "vp9/encoder/vp9_mcomp.h"
38 #include "vp9/encoder/vp9_quantize.h"
39 #include "vp9/encoder/vp9_ratectrl.h"
40 #include "vp9/encoder/vp9_rd.h"
41 #include "vp9/encoder/vp9_tokenize.h"
42 
43 #define RD_THRESH_POW      1.25
44 #define RD_MULT_EPB_RATIO  64
45 
46 // Factor to weigh the rate for switchable interp filters.
47 #define SWITCHABLE_INTERP_RATE_FACTOR 1
48 
vp9_rd_cost_reset(RD_COST * rd_cost)49 void vp9_rd_cost_reset(RD_COST *rd_cost) {
50   rd_cost->rate = INT_MAX;
51   rd_cost->dist = INT64_MAX;
52   rd_cost->rdcost = INT64_MAX;
53 }
54 
vp9_rd_cost_init(RD_COST * rd_cost)55 void vp9_rd_cost_init(RD_COST *rd_cost) {
56   rd_cost->rate = 0;
57   rd_cost->dist = 0;
58   rd_cost->rdcost = 0;
59 }
60 
61 // The baseline rd thresholds for breaking out of the rd loop for
62 // certain modes are assumed to be based on 8x8 blocks.
63 // This table is used to correct for block size.
64 // The factors here are << 2 (2 = x0.5, 32 = x8 etc).
65 static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = {
66   2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32
67 };
68 
fill_mode_costs(VP9_COMP * cpi)69 static void fill_mode_costs(VP9_COMP *cpi) {
70   const FRAME_CONTEXT *const fc = cpi->common.fc;
71   int i, j;
72 
73   for (i = 0; i < INTRA_MODES; ++i)
74     for (j = 0; j < INTRA_MODES; ++j)
75       vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j],
76                       vp9_intra_mode_tree);
77 
78   vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree);
79   vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME],
80                   vp9_kf_uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
81   vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME],
82                   fc->uv_mode_prob[TM_PRED], vp9_intra_mode_tree);
83 
84   for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
85     vp9_cost_tokens(cpi->switchable_interp_costs[i],
86                     fc->switchable_interp_prob[i], vp9_switchable_interp_tree);
87 }
88 
fill_token_costs(vp9_coeff_cost * c,vp9_coeff_probs_model (* p)[PLANE_TYPES])89 static void fill_token_costs(vp9_coeff_cost *c,
90                              vp9_coeff_probs_model (*p)[PLANE_TYPES]) {
91   int i, j, k, l;
92   TX_SIZE t;
93   for (t = TX_4X4; t <= TX_32X32; ++t)
94     for (i = 0; i < PLANE_TYPES; ++i)
95       for (j = 0; j < REF_TYPES; ++j)
96         for (k = 0; k < COEF_BANDS; ++k)
97           for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) {
98             vpx_prob probs[ENTROPY_NODES];
99             vp9_model_to_full_probs(p[t][i][j][k][l], probs);
100             vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs,
101                             vp9_coef_tree);
102             vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs,
103                                  vp9_coef_tree);
104             assert(c[t][i][j][k][0][l][EOB_TOKEN] ==
105                    c[t][i][j][k][1][l][EOB_TOKEN]);
106           }
107 }
108 
109 // Values are now correlated to quantizer.
110 static int sad_per_bit16lut_8[QINDEX_RANGE];
111 static int sad_per_bit4lut_8[QINDEX_RANGE];
112 
113 #if CONFIG_VP9_HIGHBITDEPTH
114 static int sad_per_bit16lut_10[QINDEX_RANGE];
115 static int sad_per_bit4lut_10[QINDEX_RANGE];
116 static int sad_per_bit16lut_12[QINDEX_RANGE];
117 static int sad_per_bit4lut_12[QINDEX_RANGE];
118 #endif
119 
init_me_luts_bd(int * bit16lut,int * bit4lut,int range,vpx_bit_depth_t bit_depth)120 static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range,
121                             vpx_bit_depth_t bit_depth) {
122   int i;
123   // Initialize the sad lut tables using a formulaic calculation for now.
124   // This is to make it easier to resolve the impact of experimental changes
125   // to the quantizer tables.
126   for (i = 0; i < range; i++) {
127     const double q = vp9_convert_qindex_to_q(i, bit_depth);
128     bit16lut[i] = (int)(0.0418 * q + 2.4107);
129     bit4lut[i] = (int)(0.063 * q + 2.742);
130   }
131 }
132 
vp9_init_me_luts(void)133 void vp9_init_me_luts(void) {
134   init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE,
135                   VPX_BITS_8);
136 #if CONFIG_VP9_HIGHBITDEPTH
137   init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE,
138                   VPX_BITS_10);
139   init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE,
140                   VPX_BITS_12);
141 #endif
142 }
143 
144 static const int rd_boost_factor[16] = {
145   64, 32, 32, 32, 24, 16, 12, 12,
146   8, 8, 4, 4, 2, 2, 1, 0
147 };
148 static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = {
149   128, 144, 128, 128, 144
150 };
151 
vp9_compute_rd_mult(const VP9_COMP * cpi,int qindex)152 int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) {
153   const int64_t q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth);
154 #if CONFIG_VP9_HIGHBITDEPTH
155   int64_t rdmult = 0;
156   switch (cpi->common.bit_depth) {
157     case VPX_BITS_8:
158       rdmult = 88 * q * q / 24;
159       break;
160     case VPX_BITS_10:
161       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 4);
162       break;
163     case VPX_BITS_12:
164       rdmult = ROUND_POWER_OF_TWO(88 * q * q / 24, 8);
165       break;
166     default:
167       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
168       return -1;
169   }
170 #else
171   int64_t rdmult = 88 * q * q / 24;
172 #endif  // CONFIG_VP9_HIGHBITDEPTH
173   if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) {
174     const GF_GROUP *const gf_group = &cpi->twopass.gf_group;
175     const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index];
176     const int boost_index = VPXMIN(15, (cpi->rc.gfu_boost / 100));
177 
178     rdmult = (rdmult * rd_frame_type_factor[frame_type]) >> 7;
179     rdmult += ((rdmult * rd_boost_factor[boost_index]) >> 7);
180   }
181   if (rdmult < 1)
182     rdmult = 1;
183   return (int)rdmult;
184 }
185 
compute_rd_thresh_factor(int qindex,vpx_bit_depth_t bit_depth)186 static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) {
187   double q;
188 #if CONFIG_VP9_HIGHBITDEPTH
189   switch (bit_depth) {
190     case VPX_BITS_8:
191       q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
192       break;
193     case VPX_BITS_10:
194       q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0;
195       break;
196     case VPX_BITS_12:
197       q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0;
198       break;
199     default:
200       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
201       return -1;
202   }
203 #else
204   (void) bit_depth;
205   q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0;
206 #endif  // CONFIG_VP9_HIGHBITDEPTH
207   // TODO(debargha): Adjust the function below.
208   return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8);
209 }
210 
vp9_initialize_me_consts(VP9_COMP * cpi,MACROBLOCK * x,int qindex)211 void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) {
212 #if CONFIG_VP9_HIGHBITDEPTH
213   switch (cpi->common.bit_depth) {
214     case VPX_BITS_8:
215       x->sadperbit16 = sad_per_bit16lut_8[qindex];
216       x->sadperbit4 = sad_per_bit4lut_8[qindex];
217       break;
218     case VPX_BITS_10:
219       x->sadperbit16 = sad_per_bit16lut_10[qindex];
220       x->sadperbit4 = sad_per_bit4lut_10[qindex];
221       break;
222     case VPX_BITS_12:
223       x->sadperbit16 = sad_per_bit16lut_12[qindex];
224       x->sadperbit4 = sad_per_bit4lut_12[qindex];
225       break;
226     default:
227       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
228   }
229 #else
230   (void)cpi;
231   x->sadperbit16 = sad_per_bit16lut_8[qindex];
232   x->sadperbit4 = sad_per_bit4lut_8[qindex];
233 #endif  // CONFIG_VP9_HIGHBITDEPTH
234 }
235 
set_block_thresholds(const VP9_COMMON * cm,RD_OPT * rd)236 static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) {
237   int i, bsize, segment_id;
238 
239   for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) {
240     const int qindex =
241         clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) +
242               cm->y_dc_delta_q, 0, MAXQ);
243     const int q = compute_rd_thresh_factor(qindex, cm->bit_depth);
244 
245     for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) {
246       // Threshold here seems unnecessarily harsh but fine given actual
247       // range of values used for cpi->sf.thresh_mult[].
248       const int t = q * rd_thresh_block_size_factor[bsize];
249       const int thresh_max = INT_MAX / t;
250 
251       if (bsize >= BLOCK_8X8) {
252         for (i = 0; i < MAX_MODES; ++i)
253           rd->threshes[segment_id][bsize][i] =
254               rd->thresh_mult[i] < thresh_max
255                   ? rd->thresh_mult[i] * t / 4
256                   : INT_MAX;
257       } else {
258         for (i = 0; i < MAX_REFS; ++i)
259           rd->threshes[segment_id][bsize][i] =
260               rd->thresh_mult_sub8x8[i] < thresh_max
261                   ? rd->thresh_mult_sub8x8[i] * t / 4
262                   : INT_MAX;
263       }
264     }
265   }
266 }
267 
vp9_initialize_rd_consts(VP9_COMP * cpi)268 void vp9_initialize_rd_consts(VP9_COMP *cpi) {
269   VP9_COMMON *const cm = &cpi->common;
270   MACROBLOCK *const x = &cpi->td.mb;
271   MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
272   RD_OPT *const rd = &cpi->rd;
273   int i;
274 
275   vpx_clear_system_state();
276 
277   rd->RDDIV = RDDIV_BITS;  // In bits (to multiply D by 128).
278   rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q);
279 
280   x->errorperbit = rd->RDMULT / RD_MULT_EPB_RATIO;
281   x->errorperbit += (x->errorperbit == 0);
282 
283   x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL &&
284                        cm->frame_type != KEY_FRAME) ? 0 : 1;
285 
286   set_block_thresholds(cm, rd);
287   set_partition_probs(cm, xd);
288 
289   if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME)
290     fill_token_costs(x->token_costs, cm->fc->coef_probs);
291 
292   if (cpi->sf.partition_search_type != VAR_BASED_PARTITION ||
293       cm->frame_type == KEY_FRAME) {
294     for (i = 0; i < PARTITION_CONTEXTS; ++i)
295       vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i),
296                       vp9_partition_tree);
297   }
298 
299   if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 ||
300       cm->frame_type == KEY_FRAME) {
301     fill_mode_costs(cpi);
302 
303     if (!frame_is_intra_only(cm)) {
304       vp9_build_nmv_cost_table(x->nmvjointcost,
305                                cm->allow_high_precision_mv ? x->nmvcost_hp
306                                                            : x->nmvcost,
307                                &cm->fc->nmvc, cm->allow_high_precision_mv);
308 
309       for (i = 0; i < INTER_MODE_CONTEXTS; ++i)
310         vp9_cost_tokens((int *)cpi->inter_mode_cost[i],
311                         cm->fc->inter_mode_probs[i], vp9_inter_mode_tree);
312     }
313   }
314 }
315 
model_rd_norm(int xsq_q10,int * r_q10,int * d_q10)316 static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) {
317   // NOTE: The tables below must be of the same size.
318 
319   // The functions described below are sampled at the four most significant
320   // bits of x^2 + 8 / 256.
321 
322   // Normalized rate:
323   // This table models the rate for a Laplacian source with given variance
324   // when quantized with a uniform quantizer with given stepsize. The
325   // closed form expression is:
326   // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)],
327   // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance),
328   // and H(x) is the binary entropy function.
329   static const int rate_tab_q10[] = {
330     65536,  6086,  5574,  5275,  5063,  4899,  4764,  4651,
331      4553,  4389,  4255,  4142,  4044,  3958,  3881,  3811,
332      3748,  3635,  3538,  3453,  3376,  3307,  3244,  3186,
333      3133,  3037,  2952,  2877,  2809,  2747,  2690,  2638,
334      2589,  2501,  2423,  2353,  2290,  2232,  2179,  2130,
335      2084,  2001,  1928,  1862,  1802,  1748,  1698,  1651,
336      1608,  1530,  1460,  1398,  1342,  1290,  1243,  1199,
337      1159,  1086,  1021,   963,   911,   864,   821,   781,
338       745,   680,   623,   574,   530,   490,   455,   424,
339       395,   345,   304,   269,   239,   213,   190,   171,
340       154,   126,   104,    87,    73,    61,    52,    44,
341        38,    28,    21,    16,    12,    10,     8,     6,
342         5,     3,     2,     1,     1,     1,     0,     0,
343   };
344   // Normalized distortion:
345   // This table models the normalized distortion for a Laplacian source
346   // with given variance when quantized with a uniform quantizer
347   // with given stepsize. The closed form expression is:
348   // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2))
349   // where x = qpstep / sqrt(variance).
350   // Note the actual distortion is Dn * variance.
351   static const int dist_tab_q10[] = {
352        0,     0,     1,     1,     1,     2,     2,     2,
353        3,     3,     4,     5,     5,     6,     7,     7,
354        8,     9,    11,    12,    13,    15,    16,    17,
355       18,    21,    24,    26,    29,    31,    34,    36,
356       39,    44,    49,    54,    59,    64,    69,    73,
357       78,    88,    97,   106,   115,   124,   133,   142,
358      151,   167,   184,   200,   215,   231,   245,   260,
359      274,   301,   327,   351,   375,   397,   418,   439,
360      458,   495,   528,   559,   587,   613,   637,   659,
361      680,   717,   749,   777,   801,   823,   842,   859,
362      874,   899,   919,   936,   949,   960,   969,   977,
363      983,   994,  1001,  1006,  1010,  1013,  1015,  1017,
364     1018,  1020,  1022,  1022,  1023,  1023,  1023,  1024,
365   };
366   static const int xsq_iq_q10[] = {
367          0,      4,      8,     12,     16,     20,     24,     28,
368         32,     40,     48,     56,     64,     72,     80,     88,
369         96,    112,    128,    144,    160,    176,    192,    208,
370        224,    256,    288,    320,    352,    384,    416,    448,
371        480,    544,    608,    672,    736,    800,    864,    928,
372        992,   1120,   1248,   1376,   1504,   1632,   1760,   1888,
373       2016,   2272,   2528,   2784,   3040,   3296,   3552,   3808,
374       4064,   4576,   5088,   5600,   6112,   6624,   7136,   7648,
375       8160,   9184,  10208,  11232,  12256,  13280,  14304,  15328,
376      16352,  18400,  20448,  22496,  24544,  26592,  28640,  30688,
377      32736,  36832,  40928,  45024,  49120,  53216,  57312,  61408,
378      65504,  73696,  81888,  90080,  98272, 106464, 114656, 122848,
379     131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728,
380   };
381   const int tmp = (xsq_q10 >> 2) + 8;
382   const int k = get_msb(tmp) - 3;
383   const int xq = (k << 3) + ((tmp >> k) & 0x7);
384   const int one_q10 = 1 << 10;
385   const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k);
386   const int b_q10 = one_q10 - a_q10;
387   *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10;
388   *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10;
389 }
390 
vp9_model_rd_from_var_lapndz(unsigned int var,unsigned int n_log2,unsigned int qstep,int * rate,int64_t * dist)391 void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2,
392                                   unsigned int qstep, int *rate,
393                                   int64_t *dist) {
394   // This function models the rate and distortion for a Laplacian
395   // source with given variance when quantized with a uniform quantizer
396   // with given stepsize. The closed form expressions are in:
397   // Hang and Chen, "Source Model for transform video coder and its
398   // application - Part I: Fundamental Theory", IEEE Trans. Circ.
399   // Sys. for Video Tech., April 1997.
400   if (var == 0) {
401     *rate = 0;
402     *dist = 0;
403   } else {
404     int d_q10, r_q10;
405     static const uint32_t MAX_XSQ_Q10 = 245727;
406     const uint64_t xsq_q10_64 =
407         (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var;
408     const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10);
409     model_rd_norm(xsq_q10, &r_q10, &d_q10);
410     *rate = ((r_q10 << n_log2) + 2) >> 2;
411     *dist = (var * (int64_t)d_q10 + 512) >> 10;
412   }
413 }
414 
vp9_get_entropy_contexts(BLOCK_SIZE bsize,TX_SIZE tx_size,const struct macroblockd_plane * pd,ENTROPY_CONTEXT t_above[16],ENTROPY_CONTEXT t_left[16])415 void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size,
416                               const struct macroblockd_plane *pd,
417                               ENTROPY_CONTEXT t_above[16],
418                               ENTROPY_CONTEXT t_left[16]) {
419   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd);
420   const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize];
421   const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize];
422   const ENTROPY_CONTEXT *const above = pd->above_context;
423   const ENTROPY_CONTEXT *const left = pd->left_context;
424 
425   int i;
426   switch (tx_size) {
427     case TX_4X4:
428       memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w);
429       memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h);
430       break;
431     case TX_8X8:
432       for (i = 0; i < num_4x4_w; i += 2)
433         t_above[i] = !!*(const uint16_t *)&above[i];
434       for (i = 0; i < num_4x4_h; i += 2)
435         t_left[i] = !!*(const uint16_t *)&left[i];
436       break;
437     case TX_16X16:
438       for (i = 0; i < num_4x4_w; i += 4)
439         t_above[i] = !!*(const uint32_t *)&above[i];
440       for (i = 0; i < num_4x4_h; i += 4)
441         t_left[i] = !!*(const uint32_t *)&left[i];
442       break;
443     case TX_32X32:
444       for (i = 0; i < num_4x4_w; i += 8)
445         t_above[i] = !!*(const uint64_t *)&above[i];
446       for (i = 0; i < num_4x4_h; i += 8)
447         t_left[i] = !!*(const uint64_t *)&left[i];
448       break;
449     default:
450       assert(0 && "Invalid transform size.");
451       break;
452   }
453 }
454 
vp9_mv_pred(VP9_COMP * cpi,MACROBLOCK * x,uint8_t * ref_y_buffer,int ref_y_stride,int ref_frame,BLOCK_SIZE block_size)455 void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x,
456                  uint8_t *ref_y_buffer, int ref_y_stride,
457                  int ref_frame, BLOCK_SIZE block_size) {
458   int i;
459   int zero_seen = 0;
460   int best_index = 0;
461   int best_sad = INT_MAX;
462   int this_sad = INT_MAX;
463   int max_mv = 0;
464   int near_same_nearest;
465   uint8_t *src_y_ptr = x->plane[0].src.buf;
466   uint8_t *ref_y_ptr;
467   const int num_mv_refs = MAX_MV_REF_CANDIDATES +
468                     (cpi->sf.adaptive_motion_search &&
469                      block_size < x->max_partition_size);
470 
471   MV pred_mv[3];
472   pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv;
473   pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv;
474   pred_mv[2] = x->pred_mv[ref_frame];
475   assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0])));
476 
477   near_same_nearest =
478       x->mbmi_ext->ref_mvs[ref_frame][0].as_int ==
479           x->mbmi_ext->ref_mvs[ref_frame][1].as_int;
480   // Get the sad for each candidate reference mv.
481   for (i = 0; i < num_mv_refs; ++i) {
482     const MV *this_mv = &pred_mv[i];
483     int fp_row, fp_col;
484 
485     if (i == 1 && near_same_nearest)
486       continue;
487     fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3;
488     fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3;
489     max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3);
490 
491     if (fp_row ==0 && fp_col == 0 && zero_seen)
492       continue;
493     zero_seen |= (fp_row ==0 && fp_col == 0);
494 
495     ref_y_ptr =&ref_y_buffer[ref_y_stride * fp_row + fp_col];
496     // Find sad for current vector.
497     this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride,
498                                            ref_y_ptr, ref_y_stride);
499     // Note if it is the best so far.
500     if (this_sad < best_sad) {
501       best_sad = this_sad;
502       best_index = i;
503     }
504   }
505 
506   // Note the index of the mv that worked best in the reference list.
507   x->mv_best_ref_index[ref_frame] = best_index;
508   x->max_mv_context[ref_frame] = max_mv;
509   x->pred_mv_sad[ref_frame] = best_sad;
510 }
511 
vp9_setup_pred_block(const MACROBLOCKD * xd,struct buf_2d dst[MAX_MB_PLANE],const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * scale,const struct scale_factors * scale_uv)512 void vp9_setup_pred_block(const MACROBLOCKD *xd,
513                           struct buf_2d dst[MAX_MB_PLANE],
514                           const YV12_BUFFER_CONFIG *src,
515                           int mi_row, int mi_col,
516                           const struct scale_factors *scale,
517                           const struct scale_factors *scale_uv) {
518   int i;
519 
520   dst[0].buf = src->y_buffer;
521   dst[0].stride = src->y_stride;
522   dst[1].buf = src->u_buffer;
523   dst[2].buf = src->v_buffer;
524   dst[1].stride = dst[2].stride = src->uv_stride;
525 
526   for (i = 0; i < MAX_MB_PLANE; ++i) {
527     setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col,
528                      i ? scale_uv : scale,
529                      xd->plane[i].subsampling_x, xd->plane[i].subsampling_y);
530   }
531 }
532 
vp9_raster_block_offset(BLOCK_SIZE plane_bsize,int raster_block,int stride)533 int vp9_raster_block_offset(BLOCK_SIZE plane_bsize,
534                             int raster_block, int stride) {
535   const int bw = b_width_log2_lookup[plane_bsize];
536   const int y = 4 * (raster_block >> bw);
537   const int x = 4 * (raster_block & ((1 << bw) - 1));
538   return y * stride + x;
539 }
540 
vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,int raster_block,int16_t * base)541 int16_t* vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize,
542                                        int raster_block, int16_t *base) {
543   const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize];
544   return base + vp9_raster_block_offset(plane_bsize, raster_block, stride);
545 }
546 
vp9_get_scaled_ref_frame(const VP9_COMP * cpi,int ref_frame)547 YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi,
548                                              int ref_frame) {
549   const VP9_COMMON *const cm = &cpi->common;
550   const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1];
551   const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame);
552   return
553       (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ?
554           &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL;
555 }
556 
vp9_get_switchable_rate(const VP9_COMP * cpi,const MACROBLOCKD * const xd)557 int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) {
558   const MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
559   const int ctx = vp9_get_pred_context_switchable_interp(xd);
560   return SWITCHABLE_INTERP_RATE_FACTOR *
561              cpi->switchable_interp_costs[ctx][mbmi->interp_filter];
562 }
563 
vp9_set_rd_speed_thresholds(VP9_COMP * cpi)564 void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) {
565   int i;
566   RD_OPT *const rd = &cpi->rd;
567   SPEED_FEATURES *const sf = &cpi->sf;
568 
569   // Set baseline threshold values.
570   for (i = 0; i < MAX_MODES; ++i)
571     rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0;
572 
573   if (sf->adaptive_rd_thresh) {
574     rd->thresh_mult[THR_NEARESTMV] = 300;
575     rd->thresh_mult[THR_NEARESTG] = 300;
576     rd->thresh_mult[THR_NEARESTA] = 300;
577   } else {
578     rd->thresh_mult[THR_NEARESTMV] = 0;
579     rd->thresh_mult[THR_NEARESTG] = 0;
580     rd->thresh_mult[THR_NEARESTA] = 0;
581   }
582 
583   rd->thresh_mult[THR_DC] += 1000;
584 
585   rd->thresh_mult[THR_NEWMV] += 1000;
586   rd->thresh_mult[THR_NEWA] += 1000;
587   rd->thresh_mult[THR_NEWG] += 1000;
588 
589   rd->thresh_mult[THR_NEARMV] += 1000;
590   rd->thresh_mult[THR_NEARA] += 1000;
591   rd->thresh_mult[THR_COMP_NEARESTLA] += 1000;
592   rd->thresh_mult[THR_COMP_NEARESTGA] += 1000;
593 
594   rd->thresh_mult[THR_TM] += 1000;
595 
596   rd->thresh_mult[THR_COMP_NEARLA] += 1500;
597   rd->thresh_mult[THR_COMP_NEWLA] += 2000;
598   rd->thresh_mult[THR_NEARG] += 1000;
599   rd->thresh_mult[THR_COMP_NEARGA] += 1500;
600   rd->thresh_mult[THR_COMP_NEWGA] += 2000;
601 
602   rd->thresh_mult[THR_ZEROMV] += 2000;
603   rd->thresh_mult[THR_ZEROG] += 2000;
604   rd->thresh_mult[THR_ZEROA] += 2000;
605   rd->thresh_mult[THR_COMP_ZEROLA] += 2500;
606   rd->thresh_mult[THR_COMP_ZEROGA] += 2500;
607 
608   rd->thresh_mult[THR_H_PRED] += 2000;
609   rd->thresh_mult[THR_V_PRED] += 2000;
610   rd->thresh_mult[THR_D45_PRED ] += 2500;
611   rd->thresh_mult[THR_D135_PRED] += 2500;
612   rd->thresh_mult[THR_D117_PRED] += 2500;
613   rd->thresh_mult[THR_D153_PRED] += 2500;
614   rd->thresh_mult[THR_D207_PRED] += 2500;
615   rd->thresh_mult[THR_D63_PRED] += 2500;
616 }
617 
vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP * cpi)618 void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) {
619   static const int thresh_mult[2][MAX_REFS] =
620       {{2500, 2500, 2500, 4500, 4500, 2500},
621        {2000, 2000, 2000, 4000, 4000, 2000}};
622   RD_OPT *const rd = &cpi->rd;
623   const int idx = cpi->oxcf.mode == BEST;
624   memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx]));
625 }
626 
vp9_update_rd_thresh_fact(int (* factor_buf)[MAX_MODES],int rd_thresh,int bsize,int best_mode_index)627 void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh,
628                                int bsize, int best_mode_index) {
629   if (rd_thresh > 0) {
630     const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES;
631     int mode;
632     for (mode = 0; mode < top_mode; ++mode) {
633       const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4);
634       const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64);
635       BLOCK_SIZE bs;
636       for (bs = min_size; bs <= max_size; ++bs) {
637         int *const fact = &factor_buf[bs][mode];
638         if (mode == best_mode_index) {
639           *fact -= (*fact >> 4);
640         } else {
641           *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT);
642         }
643       }
644     }
645   }
646 }
647 
vp9_get_intra_cost_penalty(int qindex,int qdelta,vpx_bit_depth_t bit_depth)648 int vp9_get_intra_cost_penalty(int qindex, int qdelta,
649                                vpx_bit_depth_t bit_depth) {
650   const int q = vp9_dc_quant(qindex, qdelta, bit_depth);
651 #if CONFIG_VP9_HIGHBITDEPTH
652   switch (bit_depth) {
653     case VPX_BITS_8:
654       return 20 * q;
655     case VPX_BITS_10:
656       return 5 * q;
657     case VPX_BITS_12:
658       return ROUND_POWER_OF_TWO(5 * q, 2);
659     default:
660       assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12");
661       return -1;
662   }
663 #else
664   return 20 * q;
665 #endif  // CONFIG_VP9_HIGHBITDEPTH
666 }
667 
668