1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <math.h>
14 #include <stdio.h>
15 
16 #include "config/aom_dsp_rtcd.h"
17 #include "config/aom_scale_rtcd.h"
18 
19 #include "aom_dsp/aom_dsp_common.h"
20 #include "aom_mem/aom_mem.h"
21 #include "aom_ports/mem.h"
22 #include "aom_ports/system_state.h"
23 #include "aom_scale/aom_scale.h"
24 #include "aom_scale/yv12config.h"
25 
26 #include "aom_dsp/variance.h"
27 #include "av1/common/entropymv.h"
28 #include "av1/common/quant_common.h"
29 #include "av1/common/reconinter.h"  // av1_setup_dst_planes()
30 #include "av1/common/txb_common.h"
31 #include "av1/encoder/aq_variance.h"
32 #include "av1/encoder/av1_quantize.h"
33 #include "av1/encoder/block.h"
34 #include "av1/encoder/dwt.h"
35 #include "av1/encoder/encodeframe.h"
36 #include "av1/encoder/encodemb.h"
37 #include "av1/encoder/encodemv.h"
38 #include "av1/encoder/encoder.h"
39 #include "av1/encoder/encode_strategy.h"
40 #include "av1/encoder/extend.h"
41 #include "av1/encoder/firstpass.h"
42 #include "av1/encoder/mcomp.h"
43 #include "av1/encoder/rd.h"
44 #include "av1/encoder/reconinter_enc.h"
45 
46 #define OUTPUT_FPF 0
47 
48 #define FIRST_PASS_Q 10.0
49 #define INTRA_MODE_PENALTY 1024
50 #define NEW_MV_MODE_PENALTY 32
51 #define DARK_THRESH 64
52 
53 #define NCOUNT_INTRA_THRESH 8192
54 #define NCOUNT_INTRA_FACTOR 3
55 
output_stats(FIRSTPASS_STATS * stats,struct aom_codec_pkt_list * pktlist)56 static void output_stats(FIRSTPASS_STATS *stats,
57                          struct aom_codec_pkt_list *pktlist) {
58   struct aom_codec_cx_pkt pkt;
59   pkt.kind = AOM_CODEC_STATS_PKT;
60   pkt.data.twopass_stats.buf = stats;
61   pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS);
62   aom_codec_pkt_list_add(pktlist, &pkt);
63 
64 // TEMP debug code
65 #if OUTPUT_FPF
66   {
67     FILE *fpfile;
68     fpfile = fopen("firstpass.stt", "a");
69 
70     fprintf(fpfile,
71             "%12.0lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf"
72             "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf"
73             "%12.4lf %12.4lf %12.0lf %12.0lf %12.0lf %12.4lf %12.4lf\n",
74             stats->frame, stats->weight, stats->intra_error, stats->coded_error,
75             stats->sr_coded_error, stats->pcnt_inter, stats->pcnt_motion,
76             stats->pcnt_second_ref, stats->pcnt_neutral, stats->intra_skip_pct,
77             stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr,
78             stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv,
79             stats->MVcv, stats->mv_in_out_count, stats->new_mv_count,
80             stats->count, stats->duration);
81     fclose(fpfile);
82   }
83 #endif
84 }
85 
av1_twopass_zero_stats(FIRSTPASS_STATS * section)86 void av1_twopass_zero_stats(FIRSTPASS_STATS *section) {
87   section->frame = 0.0;
88   section->weight = 0.0;
89   section->intra_error = 0.0;
90   section->frame_avg_wavelet_energy = 0.0;
91   section->coded_error = 0.0;
92   section->sr_coded_error = 0.0;
93   section->pcnt_inter = 0.0;
94   section->pcnt_motion = 0.0;
95   section->pcnt_second_ref = 0.0;
96   section->pcnt_neutral = 0.0;
97   section->intra_skip_pct = 0.0;
98   section->inactive_zone_rows = 0.0;
99   section->inactive_zone_cols = 0.0;
100   section->MVr = 0.0;
101   section->mvr_abs = 0.0;
102   section->MVc = 0.0;
103   section->mvc_abs = 0.0;
104   section->MVrv = 0.0;
105   section->MVcv = 0.0;
106   section->mv_in_out_count = 0.0;
107   section->new_mv_count = 0.0;
108   section->count = 0.0;
109   section->duration = 1.0;
110 }
111 
accumulate_stats(FIRSTPASS_STATS * section,const FIRSTPASS_STATS * frame)112 static void accumulate_stats(FIRSTPASS_STATS *section,
113                              const FIRSTPASS_STATS *frame) {
114   section->frame += frame->frame;
115   section->weight += frame->weight;
116   section->intra_error += frame->intra_error;
117   section->frame_avg_wavelet_energy += frame->frame_avg_wavelet_energy;
118   section->coded_error += frame->coded_error;
119   section->sr_coded_error += frame->sr_coded_error;
120   section->pcnt_inter += frame->pcnt_inter;
121   section->pcnt_motion += frame->pcnt_motion;
122   section->pcnt_second_ref += frame->pcnt_second_ref;
123   section->pcnt_neutral += frame->pcnt_neutral;
124   section->intra_skip_pct += frame->intra_skip_pct;
125   section->inactive_zone_rows += frame->inactive_zone_rows;
126   section->inactive_zone_cols += frame->inactive_zone_cols;
127   section->MVr += frame->MVr;
128   section->mvr_abs += frame->mvr_abs;
129   section->MVc += frame->MVc;
130   section->mvc_abs += frame->mvc_abs;
131   section->MVrv += frame->MVrv;
132   section->MVcv += frame->MVcv;
133   section->mv_in_out_count += frame->mv_in_out_count;
134   section->new_mv_count += frame->new_mv_count;
135   section->count += frame->count;
136   section->duration += frame->duration;
137 }
138 
av1_init_first_pass(AV1_COMP * cpi)139 void av1_init_first_pass(AV1_COMP *cpi) {
140   av1_twopass_zero_stats(&cpi->twopass.total_stats);
141 }
142 
av1_end_first_pass(AV1_COMP * cpi)143 void av1_end_first_pass(AV1_COMP *cpi) {
144   output_stats(&cpi->twopass.total_stats, cpi->output_pkt_list);
145 }
146 
get_block_variance_fn(BLOCK_SIZE bsize)147 static aom_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) {
148   switch (bsize) {
149     case BLOCK_8X8: return aom_mse8x8;
150     case BLOCK_16X8: return aom_mse16x8;
151     case BLOCK_8X16: return aom_mse8x16;
152     default: return aom_mse16x16;
153   }
154 }
155 
get_prediction_error(BLOCK_SIZE bsize,const struct buf_2d * src,const struct buf_2d * ref)156 static unsigned int get_prediction_error(BLOCK_SIZE bsize,
157                                          const struct buf_2d *src,
158                                          const struct buf_2d *ref) {
159   unsigned int sse;
160   const aom_variance_fn_t fn = get_block_variance_fn(bsize);
161   fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
162   return sse;
163 }
164 
highbd_get_block_variance_fn(BLOCK_SIZE bsize,int bd)165 static aom_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize,
166                                                       int bd) {
167   switch (bd) {
168     default:
169       switch (bsize) {
170         case BLOCK_8X8: return aom_highbd_8_mse8x8;
171         case BLOCK_16X8: return aom_highbd_8_mse16x8;
172         case BLOCK_8X16: return aom_highbd_8_mse8x16;
173         default: return aom_highbd_8_mse16x16;
174       }
175       break;
176     case 10:
177       switch (bsize) {
178         case BLOCK_8X8: return aom_highbd_10_mse8x8;
179         case BLOCK_16X8: return aom_highbd_10_mse16x8;
180         case BLOCK_8X16: return aom_highbd_10_mse8x16;
181         default: return aom_highbd_10_mse16x16;
182       }
183       break;
184     case 12:
185       switch (bsize) {
186         case BLOCK_8X8: return aom_highbd_12_mse8x8;
187         case BLOCK_16X8: return aom_highbd_12_mse16x8;
188         case BLOCK_8X16: return aom_highbd_12_mse8x16;
189         default: return aom_highbd_12_mse16x16;
190       }
191       break;
192   }
193 }
194 
highbd_get_prediction_error(BLOCK_SIZE bsize,const struct buf_2d * src,const struct buf_2d * ref,int bd)195 static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize,
196                                                 const struct buf_2d *src,
197                                                 const struct buf_2d *ref,
198                                                 int bd) {
199   unsigned int sse;
200   const aom_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd);
201   fn(src->buf, src->stride, ref->buf, ref->stride, &sse);
202   return sse;
203 }
204 
205 // Refine the motion search range according to the frame dimension
206 // for first pass test.
get_search_range(const AV1_COMP * cpi)207 static int get_search_range(const AV1_COMP *cpi) {
208   int sr = 0;
209   const int dim = AOMMIN(cpi->initial_width, cpi->initial_height);
210 
211   while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr;
212   return sr;
213 }
214 
first_pass_motion_search(AV1_COMP * cpi,MACROBLOCK * x,const MV * ref_mv,MV * best_mv,int * best_motion_err)215 static void first_pass_motion_search(AV1_COMP *cpi, MACROBLOCK *x,
216                                      const MV *ref_mv, MV *best_mv,
217                                      int *best_motion_err) {
218   MACROBLOCKD *const xd = &x->e_mbd;
219   MV tmp_mv = kZeroMv;
220   MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
221   int num00, tmp_err, n;
222   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
223   aom_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize];
224   const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY;
225 
226   int step_param = 3;
227   int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param;
228   const int sr = get_search_range(cpi);
229   step_param += sr;
230   further_steps -= sr;
231 
232   // Override the default variance function to use MSE.
233   v_fn_ptr.vf = get_block_variance_fn(bsize);
234   if (is_cur_buf_hbd(xd)) {
235     v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd);
236   }
237 
238   // Center the initial step/diamond search on best mv.
239   tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg[SS_CFG_SRC], &ref_mv_full,
240                                     &tmp_mv, step_param, x->sadperbit16, &num00,
241                                     &v_fn_ptr, ref_mv);
242   if (tmp_err < INT_MAX)
243     tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
244   if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty;
245 
246   if (tmp_err < *best_motion_err) {
247     *best_motion_err = tmp_err;
248     *best_mv = tmp_mv;
249   }
250 
251   // Carry out further step/diamond searches as necessary.
252   n = num00;
253   num00 = 0;
254 
255   while (n < further_steps) {
256     ++n;
257 
258     if (num00) {
259       --num00;
260     } else {
261       tmp_err = cpi->diamond_search_sad(
262           x, &cpi->ss_cfg[SS_CFG_SRC], &ref_mv_full, &tmp_mv, step_param + n,
263           x->sadperbit16, &num00, &v_fn_ptr, ref_mv);
264       if (tmp_err < INT_MAX)
265         tmp_err = av1_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1);
266       if (tmp_err < INT_MAX - new_mv_mode_penalty)
267         tmp_err += new_mv_mode_penalty;
268 
269       if (tmp_err < *best_motion_err) {
270         *best_motion_err = tmp_err;
271         *best_mv = tmp_mv;
272       }
273     }
274   }
275 }
276 
get_bsize(const AV1_COMMON * cm,int mb_row,int mb_col)277 static BLOCK_SIZE get_bsize(const AV1_COMMON *cm, int mb_row, int mb_col) {
278   if (mi_size_wide[BLOCK_16X16] * mb_col + mi_size_wide[BLOCK_8X8] <
279       cm->mi_cols) {
280     return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] <
281                    cm->mi_rows
282                ? BLOCK_16X16
283                : BLOCK_16X8;
284   } else {
285     return mi_size_wide[BLOCK_16X16] * mb_row + mi_size_wide[BLOCK_8X8] <
286                    cm->mi_rows
287                ? BLOCK_8X16
288                : BLOCK_8X8;
289   }
290 }
291 
find_fp_qindex(aom_bit_depth_t bit_depth)292 static int find_fp_qindex(aom_bit_depth_t bit_depth) {
293   return av1_find_qindex(FIRST_PASS_Q, bit_depth, 0, QINDEX_RANGE - 1);
294 }
295 
raw_motion_error_stdev(int * raw_motion_err_list,int raw_motion_err_counts)296 static double raw_motion_error_stdev(int *raw_motion_err_list,
297                                      int raw_motion_err_counts) {
298   int64_t sum_raw_err = 0;
299   double raw_err_avg = 0;
300   double raw_err_stdev = 0;
301   if (raw_motion_err_counts == 0) return 0;
302 
303   int i;
304   for (i = 0; i < raw_motion_err_counts; i++) {
305     sum_raw_err += raw_motion_err_list[i];
306   }
307   raw_err_avg = (double)sum_raw_err / raw_motion_err_counts;
308   for (i = 0; i < raw_motion_err_counts; i++) {
309     raw_err_stdev += (raw_motion_err_list[i] - raw_err_avg) *
310                      (raw_motion_err_list[i] - raw_err_avg);
311   }
312   // Calculate the standard deviation for the motion error of all the inter
313   // blocks of the 0,0 motion using the last source
314   // frame as the reference.
315   raw_err_stdev = sqrt(raw_err_stdev / raw_motion_err_counts);
316   return raw_err_stdev;
317 }
318 
319 #define UL_INTRA_THRESH 50
320 #define INVALID_ROW -1
av1_first_pass(AV1_COMP * cpi,const int64_t ts_duration)321 void av1_first_pass(AV1_COMP *cpi, const int64_t ts_duration) {
322   int mb_row, mb_col;
323   MACROBLOCK *const x = &cpi->td.mb;
324   AV1_COMMON *const cm = &cpi->common;
325   CurrentFrame *const current_frame = &cm->current_frame;
326   const SequenceHeader *const seq_params = &cm->seq_params;
327   const int num_planes = av1_num_planes(cm);
328   MACROBLOCKD *const xd = &x->e_mbd;
329   TileInfo tile;
330   struct macroblock_plane *const p = x->plane;
331   struct macroblockd_plane *const pd = xd->plane;
332   const PICK_MODE_CONTEXT *ctx =
333       &cpi->td.pc_root[MAX_MIB_SIZE_LOG2 - MIN_MIB_SIZE_LOG2]->none;
334   int i;
335 
336   int recon_yoffset, src_yoffset, recon_uvoffset;
337   int64_t intra_error = 0;
338   int64_t frame_avg_wavelet_energy = 0;
339   int64_t coded_error = 0;
340   int64_t sr_coded_error = 0;
341 
342   int sum_mvr = 0, sum_mvc = 0;
343   int sum_mvr_abs = 0, sum_mvc_abs = 0;
344   int64_t sum_mvrs = 0, sum_mvcs = 0;
345   int mvcount = 0;
346   int intercount = 0;
347   int second_ref_count = 0;
348   const int intrapenalty = INTRA_MODE_PENALTY;
349   double neutral_count;
350   int intra_skip_count = 0;
351   int image_data_start_row = INVALID_ROW;
352   int new_mv_count = 0;
353   int sum_in_vectors = 0;
354   MV lastmv = kZeroMv;
355   TWO_PASS *twopass = &cpi->twopass;
356   int recon_y_stride, src_y_stride, recon_uv_stride, uv_mb_height;
357 
358   const YV12_BUFFER_CONFIG *const lst_yv12 =
359       get_ref_frame_yv12_buf(cm, LAST_FRAME);
360   const YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_yv12_buf(cm, GOLDEN_FRAME);
361   YV12_BUFFER_CONFIG *const new_yv12 = &cm->cur_frame->buf;
362   const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12;
363   double intra_factor;
364   double brightness_factor;
365   const int qindex = find_fp_qindex(seq_params->bit_depth);
366   const int mb_scale = mi_size_wide[BLOCK_16X16];
367 
368   int *raw_motion_err_list;
369   int raw_motion_err_counts = 0;
370   CHECK_MEM_ERROR(
371       cm, raw_motion_err_list,
372       aom_calloc(cm->mb_rows * cm->mb_cols, sizeof(*raw_motion_err_list)));
373   // First pass code requires valid last and new frame buffers.
374   assert(new_yv12 != NULL);
375   assert(frame_is_intra_only(cm) || (lst_yv12 != NULL));
376 
377   av1_setup_frame_size(cpi);
378   aom_clear_system_state();
379 
380   xd->mi = cm->mi_grid_visible;
381   xd->mi[0] = cm->mi;
382   x->e_mbd.mi[0]->sb_type = BLOCK_16X16;
383 
384   intra_factor = 0.0;
385   brightness_factor = 0.0;
386   neutral_count = 0.0;
387 
388   // Do not use periodic key frames.
389   cpi->rc.frames_to_key = INT_MAX;
390 
391   av1_set_quantizer(cm, qindex);
392 
393   av1_setup_block_planes(&x->e_mbd, seq_params->subsampling_x,
394                          seq_params->subsampling_y, num_planes);
395 
396   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
397                        x->e_mbd.mi[0]->sb_type);
398   av1_setup_dst_planes(xd->plane, seq_params->sb_size, new_yv12, 0, 0, 0,
399                        num_planes);
400 
401   if (!frame_is_intra_only(cm)) {
402     av1_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL, num_planes);
403   }
404 
405   xd->mi = cm->mi_grid_visible;
406   xd->mi[0] = cm->mi;
407 
408   // Don't store luma on the fist pass since chroma is not computed
409   xd->cfl.store_y = 0;
410   av1_frame_init_quantizer(cpi);
411 
412   for (i = 0; i < num_planes; ++i) {
413     p[i].coeff = ctx->coeff[i];
414     p[i].qcoeff = ctx->qcoeff[i];
415     pd[i].dqcoeff = ctx->dqcoeff[i];
416     p[i].eobs = ctx->eobs[i];
417     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
418   }
419 
420   av1_init_mv_probs(cm);
421   av1_initialize_rd_consts(cpi);
422 
423   // Tiling is ignored in the first pass.
424   av1_tile_init(&tile, cm, 0, 0);
425   src_y_stride = cpi->source->y_stride;
426   recon_y_stride = new_yv12->y_stride;
427   recon_uv_stride = new_yv12->uv_stride;
428   uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height);
429 
430   for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) {
431     MV best_ref_mv = kZeroMv;
432 
433     // Reset above block coeffs.
434     xd->up_available = (mb_row != 0);
435     recon_yoffset = (mb_row * recon_y_stride * 16);
436     src_yoffset = (mb_row * src_y_stride * 16);
437     recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height);
438 
439     // Set up limit values for motion vectors to prevent them extending
440     // outside the UMV borders.
441     x->mv_limits.row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16);
442     x->mv_limits.row_max =
443         ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16;
444 
445     for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) {
446       int this_error;
447       const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
448       const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col);
449       double log_intra;
450       int level_sample;
451 
452       aom_clear_system_state();
453 
454       const int idx_str = xd->mi_stride * mb_row * mb_scale + mb_col * mb_scale;
455       xd->mi = cm->mi_grid_visible + idx_str;
456       xd->mi[0] = cm->mi + idx_str;
457       xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset;
458       xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset;
459       xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset;
460       xd->left_available = (mb_col != 0);
461       xd->mi[0]->sb_type = bsize;
462       xd->mi[0]->ref_frame[0] = INTRA_FRAME;
463       set_mi_row_col(xd, &tile, mb_row * mb_scale, mi_size_high[bsize],
464                      mb_col * mb_scale, mi_size_wide[bsize], cm->mi_rows,
465                      cm->mi_cols);
466 
467       set_plane_n4(xd, mi_size_wide[bsize], mi_size_high[bsize], num_planes);
468 
469       // Do intra 16x16 prediction.
470       xd->mi[0]->segment_id = 0;
471       xd->lossless[xd->mi[0]->segment_id] = (qindex == 0);
472       xd->mi[0]->mode = DC_PRED;
473       xd->mi[0]->tx_size =
474           use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4;
475       av1_encode_intra_block_plane(cpi, x, bsize, 0, 0, mb_row * 2, mb_col * 2);
476       this_error = aom_get_mb_ss(x->plane[0].src_diff);
477 
478       if (this_error < UL_INTRA_THRESH) {
479         ++intra_skip_count;
480       } else if ((mb_col > 0) && (image_data_start_row == INVALID_ROW)) {
481         image_data_start_row = mb_row;
482       }
483 
484       if (seq_params->use_highbitdepth) {
485         switch (seq_params->bit_depth) {
486           case AOM_BITS_8: break;
487           case AOM_BITS_10: this_error >>= 4; break;
488           case AOM_BITS_12: this_error >>= 8; break;
489           default:
490             assert(0 &&
491                    "seq_params->bit_depth should be AOM_BITS_8, "
492                    "AOM_BITS_10 or AOM_BITS_12");
493             return;
494         }
495       }
496 
497       aom_clear_system_state();
498       log_intra = log(this_error + 1.0);
499       if (log_intra < 10.0)
500         intra_factor += 1.0 + ((10.0 - log_intra) * 0.05);
501       else
502         intra_factor += 1.0;
503 
504       if (seq_params->use_highbitdepth)
505         level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0];
506       else
507         level_sample = x->plane[0].src.buf[0];
508       if ((level_sample < DARK_THRESH) && (log_intra < 9.0))
509         brightness_factor += 1.0 + (0.01 * (DARK_THRESH - level_sample));
510       else
511         brightness_factor += 1.0;
512 
513       // Intrapenalty below deals with situations where the intra and inter
514       // error scores are very low (e.g. a plain black frame).
515       // We do not have special cases in first pass for 0,0 and nearest etc so
516       // all inter modes carry an overhead cost estimate for the mv.
517       // When the error score is very low this causes us to pick all or lots of
518       // INTRA modes and throw lots of key frames.
519       // This penalty adds a cost matching that of a 0,0 mv to the intra case.
520       this_error += intrapenalty;
521 
522       // Accumulate the intra error.
523       intra_error += (int64_t)this_error;
524 
525       const int hbd = is_cur_buf_hbd(xd);
526       const int stride = x->plane[0].src.stride;
527       uint8_t *buf = x->plane[0].src.buf;
528       for (int r8 = 0; r8 < 2; ++r8) {
529         for (int c8 = 0; c8 < 2; ++c8) {
530           frame_avg_wavelet_energy += av1_haar_ac_sad_8x8_uint8_input(
531               buf + c8 * 8 + r8 * 8 * stride, stride, hbd);
532         }
533       }
534 
535       // Set up limit values for motion vectors to prevent them extending
536       // outside the UMV borders.
537       x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16);
538       x->mv_limits.col_max =
539           ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16;
540 
541       if (!frame_is_intra_only(cm)) {  // Do a motion search
542         int tmp_err, motion_error, raw_motion_error;
543         // Assume 0,0 motion with no mv overhead.
544         MV mv = kZeroMv, tmp_mv = kZeroMv;
545         struct buf_2d unscaled_last_source_buf_2d;
546 
547         xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
548         if (is_cur_buf_hbd(xd)) {
549           motion_error = highbd_get_prediction_error(
550               bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
551         } else {
552           motion_error = get_prediction_error(bsize, &x->plane[0].src,
553                                               &xd->plane[0].pre[0]);
554         }
555 
556         // Compute the motion error of the 0,0 motion using the last source
557         // frame as the reference. Skip the further motion search on
558         // reconstructed frame if this error is small.
559         unscaled_last_source_buf_2d.buf =
560             cpi->unscaled_last_source->y_buffer + src_yoffset;
561         unscaled_last_source_buf_2d.stride =
562             cpi->unscaled_last_source->y_stride;
563         if (is_cur_buf_hbd(xd)) {
564           raw_motion_error = highbd_get_prediction_error(
565               bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd);
566         } else {
567           raw_motion_error = get_prediction_error(bsize, &x->plane[0].src,
568                                                   &unscaled_last_source_buf_2d);
569         }
570 
571         // TODO(pengchong): Replace the hard-coded threshold
572         if (raw_motion_error > 25) {
573           // Test last reference frame using the previous best mv as the
574           // starting point (best reference) for the search.
575           first_pass_motion_search(cpi, x, &best_ref_mv, &mv, &motion_error);
576 
577           // If the current best reference mv is not centered on 0,0 then do a
578           // 0,0 based search as well.
579           if (!is_zero_mv(&best_ref_mv)) {
580             tmp_err = INT_MAX;
581             first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv, &tmp_err);
582 
583             if (tmp_err < motion_error) {
584               motion_error = tmp_err;
585               mv = tmp_mv;
586             }
587           }
588 
589           // Search in an older reference frame.
590           if ((current_frame->frame_number > 1) && gld_yv12 != NULL) {
591             // Assume 0,0 motion with no mv overhead.
592             int gf_motion_error;
593 
594             xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset;
595             if (is_cur_buf_hbd(xd)) {
596               gf_motion_error = highbd_get_prediction_error(
597                   bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd);
598             } else {
599               gf_motion_error = get_prediction_error(bsize, &x->plane[0].src,
600                                                      &xd->plane[0].pre[0]);
601             }
602 
603             first_pass_motion_search(cpi, x, &kZeroMv, &tmp_mv,
604                                      &gf_motion_error);
605 
606             if (gf_motion_error < motion_error && gf_motion_error < this_error)
607               ++second_ref_count;
608 
609             // Reset to last frame as reference buffer.
610             xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset;
611             xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset;
612             xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset;
613 
614             // In accumulating a score for the older reference frame take the
615             // best of the motion predicted score and the intra coded error
616             // (just as will be done for) accumulation of "coded_error" for
617             // the last frame.
618             if (gf_motion_error < this_error)
619               sr_coded_error += gf_motion_error;
620             else
621               sr_coded_error += this_error;
622           } else {
623             sr_coded_error += motion_error;
624           }
625         } else {
626           sr_coded_error += motion_error;
627         }
628 
629         // Start by assuming that intra mode is best.
630         best_ref_mv.row = 0;
631         best_ref_mv.col = 0;
632 
633         if (motion_error <= this_error) {
634           aom_clear_system_state();
635 
636           // Keep a count of cases where the inter and intra were very close
637           // and very low. This helps with scene cut detection for example in
638           // cropped clips with black bars at the sides or top and bottom.
639           if (((this_error - intrapenalty) * 9 <= motion_error * 10) &&
640               (this_error < (2 * intrapenalty))) {
641             neutral_count += 1.0;
642             // Also track cases where the intra is not much worse than the inter
643             // and use this in limiting the GF/arf group length.
644           } else if ((this_error > NCOUNT_INTRA_THRESH) &&
645                      (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) {
646             neutral_count +=
647                 (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error);
648           }
649 
650           mv.row *= 8;
651           mv.col *= 8;
652           this_error = motion_error;
653           xd->mi[0]->mode = NEWMV;
654           xd->mi[0]->mv[0].as_mv = mv;
655           xd->mi[0]->tx_size = TX_4X4;
656           xd->mi[0]->ref_frame[0] = LAST_FRAME;
657           xd->mi[0]->ref_frame[1] = NONE_FRAME;
658           av1_enc_build_inter_predictor(cm, xd, mb_row * mb_scale,
659                                         mb_col * mb_scale, NULL, bsize,
660                                         AOM_PLANE_Y, AOM_PLANE_Y);
661           av1_encode_sby_pass1(cm, x, bsize);
662           sum_mvr += mv.row;
663           sum_mvr_abs += abs(mv.row);
664           sum_mvc += mv.col;
665           sum_mvc_abs += abs(mv.col);
666           sum_mvrs += mv.row * mv.row;
667           sum_mvcs += mv.col * mv.col;
668           ++intercount;
669 
670           best_ref_mv = mv;
671 
672           if (!is_zero_mv(&mv)) {
673             ++mvcount;
674 
675             // Non-zero vector, was it different from the last non zero vector?
676             if (!is_equal_mv(&mv, &lastmv)) ++new_mv_count;
677             lastmv = mv;
678 
679             // Does the row vector point inwards or outwards?
680             if (mb_row < cm->mb_rows / 2) {
681               if (mv.row > 0)
682                 --sum_in_vectors;
683               else if (mv.row < 0)
684                 ++sum_in_vectors;
685             } else if (mb_row > cm->mb_rows / 2) {
686               if (mv.row > 0)
687                 ++sum_in_vectors;
688               else if (mv.row < 0)
689                 --sum_in_vectors;
690             }
691 
692             // Does the col vector point inwards or outwards?
693             if (mb_col < cm->mb_cols / 2) {
694               if (mv.col > 0)
695                 --sum_in_vectors;
696               else if (mv.col < 0)
697                 ++sum_in_vectors;
698             } else if (mb_col > cm->mb_cols / 2) {
699               if (mv.col > 0)
700                 ++sum_in_vectors;
701               else if (mv.col < 0)
702                 --sum_in_vectors;
703             }
704           }
705         }
706         raw_motion_err_list[raw_motion_err_counts++] = raw_motion_error;
707       } else {
708         sr_coded_error += (int64_t)this_error;
709       }
710       coded_error += (int64_t)this_error;
711 
712       // Adjust to the next column of MBs.
713       x->plane[0].src.buf += 16;
714       x->plane[1].src.buf += uv_mb_height;
715       x->plane[2].src.buf += uv_mb_height;
716 
717       recon_yoffset += 16;
718       src_yoffset += 16;
719       recon_uvoffset += uv_mb_height;
720     }
721     // Adjust to the next row of MBs.
722     x->plane[0].src.buf += 16 * x->plane[0].src.stride - 16 * cm->mb_cols;
723     x->plane[1].src.buf +=
724         uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
725     x->plane[2].src.buf +=
726         uv_mb_height * x->plane[1].src.stride - uv_mb_height * cm->mb_cols;
727 
728     aom_clear_system_state();
729   }
730   const double raw_err_stdev =
731       raw_motion_error_stdev(raw_motion_err_list, raw_motion_err_counts);
732   aom_free(raw_motion_err_list);
733 
734   // Clamp the image start to rows/2. This number of rows is discarded top
735   // and bottom as dead data so rows / 2 means the frame is blank.
736   if ((image_data_start_row > cm->mb_rows / 2) ||
737       (image_data_start_row == INVALID_ROW)) {
738     image_data_start_row = cm->mb_rows / 2;
739   }
740   // Exclude any image dead zone
741   if (image_data_start_row > 0) {
742     intra_skip_count =
743         AOMMAX(0, intra_skip_count - (image_data_start_row * cm->mb_cols * 2));
744   }
745 
746   {
747     FIRSTPASS_STATS fps;
748     // The minimum error here insures some bit allocation to frames even
749     // in static regions. The allocation per MB declines for larger formats
750     // where the typical "real" energy per MB also falls.
751     // Initial estimate here uses sqrt(mbs) to define the min_err, where the
752     // number of mbs is proportional to the image area.
753     const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE)
754                             ? cpi->initial_mbs
755                             : cpi->common.MBs;
756     const double min_err = 200 * sqrt(num_mbs);
757 
758     intra_factor = intra_factor / (double)num_mbs;
759     brightness_factor = brightness_factor / (double)num_mbs;
760     fps.weight = intra_factor * brightness_factor;
761 
762     fps.frame = current_frame->frame_number;
763     fps.coded_error = (double)(coded_error >> 8) + min_err;
764     fps.sr_coded_error = (double)(sr_coded_error >> 8) + min_err;
765     fps.intra_error = (double)(intra_error >> 8) + min_err;
766     fps.frame_avg_wavelet_energy = (double)frame_avg_wavelet_energy;
767     fps.count = 1.0;
768     fps.pcnt_inter = (double)intercount / num_mbs;
769     fps.pcnt_second_ref = (double)second_ref_count / num_mbs;
770     fps.pcnt_neutral = (double)neutral_count / num_mbs;
771     fps.intra_skip_pct = (double)intra_skip_count / num_mbs;
772     fps.inactive_zone_rows = (double)image_data_start_row;
773     fps.inactive_zone_cols = (double)0;  // TODO(paulwilkins): fix
774     fps.raw_error_stdev = raw_err_stdev;
775 
776     if (mvcount > 0) {
777       fps.MVr = (double)sum_mvr / mvcount;
778       fps.mvr_abs = (double)sum_mvr_abs / mvcount;
779       fps.MVc = (double)sum_mvc / mvcount;
780       fps.mvc_abs = (double)sum_mvc_abs / mvcount;
781       fps.MVrv =
782           ((double)sum_mvrs - ((double)sum_mvr * sum_mvr / mvcount)) / mvcount;
783       fps.MVcv =
784           ((double)sum_mvcs - ((double)sum_mvc * sum_mvc / mvcount)) / mvcount;
785       fps.mv_in_out_count = (double)sum_in_vectors / (mvcount * 2);
786       fps.new_mv_count = new_mv_count;
787       fps.pcnt_motion = (double)mvcount / num_mbs;
788     } else {
789       fps.MVr = 0.0;
790       fps.mvr_abs = 0.0;
791       fps.MVc = 0.0;
792       fps.mvc_abs = 0.0;
793       fps.MVrv = 0.0;
794       fps.MVcv = 0.0;
795       fps.mv_in_out_count = 0.0;
796       fps.new_mv_count = 0.0;
797       fps.pcnt_motion = 0.0;
798     }
799 
800     // TODO(paulwilkins):  Handle the case when duration is set to 0, or
801     // something less than the full time between subsequent values of
802     // cpi->source_time_stamp.
803     fps.duration = (double)ts_duration;
804 
805     // Don't want to do output stats with a stack variable!
806     twopass->this_frame_stats = fps;
807     output_stats(&twopass->this_frame_stats, cpi->output_pkt_list);
808     accumulate_stats(&twopass->total_stats, &fps);
809   }
810 
811   // Copy the previous Last Frame back into gf and and arf buffers if
812   // the prediction is good enough... but also don't allow it to lag too far.
813   if ((twopass->sr_update_lag > 3) ||
814       ((current_frame->frame_number > 0) &&
815        (twopass->this_frame_stats.pcnt_inter > 0.20) &&
816        ((twopass->this_frame_stats.intra_error /
817          DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) {
818     if (gld_yv12 != NULL) {
819       assign_frame_buffer_p(
820           &cm->ref_frame_map[get_ref_frame_map_idx(cm, GOLDEN_FRAME)],
821           cm->ref_frame_map[get_ref_frame_map_idx(cm, LAST_FRAME)]);
822     }
823     twopass->sr_update_lag = 1;
824   } else {
825     ++twopass->sr_update_lag;
826   }
827 
828   aom_extend_frame_borders(new_yv12, num_planes);
829 
830   // The frame we just compressed now becomes the last frame.
831   assign_frame_buffer_p(
832       &cm->ref_frame_map[get_ref_frame_map_idx(cm, LAST_FRAME)], cm->cur_frame);
833 
834   // Special case for the first frame. Copy into the GF buffer as a second
835   // reference.
836   if (current_frame->frame_number == 0 &&
837       get_ref_frame_map_idx(cm, GOLDEN_FRAME) != INVALID_IDX) {
838     assign_frame_buffer_p(
839         &cm->ref_frame_map[get_ref_frame_map_idx(cm, GOLDEN_FRAME)],
840         cm->ref_frame_map[get_ref_frame_map_idx(cm, LAST_FRAME)]);
841   }
842 
843   // Use this to see what the first pass reconstruction looks like.
844   if (0) {
845     char filename[512];
846     FILE *recon_file;
847     snprintf(filename, sizeof(filename), "enc%04d.yuv",
848              (int)current_frame->frame_number);
849 
850     if (current_frame->frame_number == 0)
851       recon_file = fopen(filename, "wb");
852     else
853       recon_file = fopen(filename, "ab");
854 
855     (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
856     fclose(recon_file);
857   }
858 
859   ++current_frame->frame_number;
860 }
861