1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <limits.h>
13 #include <float.h>
14 #include <math.h>
15 #include <stdbool.h>
16 #include <stdio.h>
17 
18 #include "config/aom_config.h"
19 #include "config/aom_dsp_rtcd.h"
20 #include "config/av1_rtcd.h"
21 
22 #include "aom_dsp/aom_dsp_common.h"
23 #include "aom_dsp/binary_codes_writer.h"
24 #include "aom_ports/mem.h"
25 #include "aom_ports/aom_timer.h"
26 #include "aom_ports/system_state.h"
27 
28 #if CONFIG_MISMATCH_DEBUG
29 #include "aom_util/debug_util.h"
30 #endif  // CONFIG_MISMATCH_DEBUG
31 
32 #include "av1/common/cfl.h"
33 #include "av1/common/common.h"
34 #include "av1/common/entropy.h"
35 #include "av1/common/entropymode.h"
36 #include "av1/common/idct.h"
37 #include "av1/common/mv.h"
38 #include "av1/common/mvref_common.h"
39 #include "av1/common/pred_common.h"
40 #include "av1/common/quant_common.h"
41 #include "av1/common/reconintra.h"
42 #include "av1/common/reconinter.h"
43 #include "av1/common/seg_common.h"
44 #include "av1/common/tile_common.h"
45 #include "av1/common/warped_motion.h"
46 
47 #include "av1/encoder/aq_complexity.h"
48 #include "av1/encoder/aq_cyclicrefresh.h"
49 #include "av1/encoder/aq_variance.h"
50 #include "av1/encoder/global_motion.h"
51 #include "av1/encoder/encodeframe.h"
52 #include "av1/encoder/encodemb.h"
53 #include "av1/encoder/encodemv.h"
54 #include "av1/encoder/encodetxb.h"
55 #include "av1/encoder/ethread.h"
56 #include "av1/encoder/extend.h"
57 #include "av1/encoder/ml.h"
58 #include "av1/encoder/partition_strategy.h"
59 #include "av1/encoder/partition_model_weights.h"
60 #include "av1/encoder/rd.h"
61 #include "av1/encoder/rdopt.h"
62 #include "av1/encoder/reconinter_enc.h"
63 #include "av1/encoder/segmentation.h"
64 #include "av1/encoder/tokenize.h"
65 #include "av1/encoder/var_based_part.h"
66 
67 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
68                               ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
69                               int mi_row, int mi_col, BLOCK_SIZE bsize,
70                               int *rate);
71 static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
72                                const MACROBLOCK *const x,
73                                const RD_STATS *const rd_stats,
74                                unsigned int pb_source_variance);
75 
76 // This is used as a reference when computing the source variance for the
77 //  purposes of activity masking.
78 // Eventually this should be replaced by custom no-reference routines,
79 //  which will be faster.
80 const uint8_t AV1_VAR_OFFS[MAX_SB_SIZE] = {
81   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
82   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
83   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
84   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
85   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
86   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
87   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
88   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
89   128, 128, 128, 128, 128, 128, 128, 128
90 };
91 
92 static const uint16_t AV1_HIGH_VAR_OFFS_8[MAX_SB_SIZE] = {
93   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
94   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
95   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
96   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
97   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
98   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
99   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
100   128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128,
101   128, 128, 128, 128, 128, 128, 128, 128
102 };
103 
104 static const uint16_t AV1_HIGH_VAR_OFFS_10[MAX_SB_SIZE] = {
105   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
106   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
107   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
108   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
109   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
110   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
111   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
112   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
113   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
114   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
115   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
116   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
117   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
118   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
119   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4,
120   128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4
121 };
122 
123 static const uint16_t AV1_HIGH_VAR_OFFS_12[MAX_SB_SIZE] = {
124   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
125   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
126   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
127   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
128   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
129   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
130   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
131   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
132   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
133   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
134   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
135   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
136   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
137   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
138   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
139   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
140   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
141   128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16,
142   128 * 16, 128 * 16
143 };
144 
av1_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)145 unsigned int av1_get_sby_perpixel_variance(const AV1_COMP *cpi,
146                                            const struct buf_2d *ref,
147                                            BLOCK_SIZE bs) {
148   unsigned int sse;
149   const unsigned int var =
150       cpi->fn_ptr[bs].vf(ref->buf, ref->stride, AV1_VAR_OFFS, 0, &sse);
151   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
152 }
153 
av1_high_get_sby_perpixel_variance(const AV1_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)154 unsigned int av1_high_get_sby_perpixel_variance(const AV1_COMP *cpi,
155                                                 const struct buf_2d *ref,
156                                                 BLOCK_SIZE bs, int bd) {
157   unsigned int var, sse;
158   switch (bd) {
159     case 10:
160       var =
161           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
162                              CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10), 0, &sse);
163       break;
164     case 12:
165       var =
166           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
167                              CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12), 0, &sse);
168       break;
169     case 8:
170     default:
171       var =
172           cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
173                              CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8), 0, &sse);
174       break;
175   }
176   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
177 }
178 
get_sby_perpixel_diff_variance(const AV1_COMP * const cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)179 static unsigned int get_sby_perpixel_diff_variance(const AV1_COMP *const cpi,
180                                                    const struct buf_2d *ref,
181                                                    int mi_row, int mi_col,
182                                                    BLOCK_SIZE bs) {
183   unsigned int sse, var;
184   uint8_t *last_y;
185   const YV12_BUFFER_CONFIG *last =
186       get_ref_frame_yv12_buf(&cpi->common, LAST_FRAME);
187 
188   assert(last != NULL);
189   last_y =
190       &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
191   var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
192   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
193 }
194 
get_rd_var_based_fixed_partition(AV1_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)195 static BLOCK_SIZE get_rd_var_based_fixed_partition(AV1_COMP *cpi, MACROBLOCK *x,
196                                                    int mi_row, int mi_col) {
197   unsigned int var = get_sby_perpixel_diff_variance(
198       cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64);
199   if (var < 8)
200     return BLOCK_64X64;
201   else if (var < 128)
202     return BLOCK_32X32;
203   else if (var < 2048)
204     return BLOCK_16X16;
205   else
206     return BLOCK_8X8;
207 }
208 
set_offsets_without_segment_id(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)209 static void set_offsets_without_segment_id(const AV1_COMP *const cpi,
210                                            const TileInfo *const tile,
211                                            MACROBLOCK *const x, int mi_row,
212                                            int mi_col, BLOCK_SIZE bsize) {
213   const AV1_COMMON *const cm = &cpi->common;
214   const int num_planes = av1_num_planes(cm);
215   MACROBLOCKD *const xd = &x->e_mbd;
216   const int mi_width = mi_size_wide[bsize];
217   const int mi_height = mi_size_high[bsize];
218 
219   set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
220 
221   set_skip_context(xd, mi_row, mi_col, num_planes);
222   xd->above_txfm_context = cm->above_txfm_context[tile->tile_row] + mi_col;
223   xd->left_txfm_context =
224       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
225 
226   // Set up destination pointers.
227   av1_setup_dst_planes(xd->plane, bsize, &cm->cur_frame->buf, mi_row, mi_col, 0,
228                        num_planes);
229 
230   // Set up limit values for MV components.
231   // Mv beyond the range do not produce new/different prediction block.
232   x->mv_limits.row_min =
233       -(((mi_row + mi_height) * MI_SIZE) + AOM_INTERP_EXTEND);
234   x->mv_limits.col_min = -(((mi_col + mi_width) * MI_SIZE) + AOM_INTERP_EXTEND);
235   x->mv_limits.row_max = (cm->mi_rows - mi_row) * MI_SIZE + AOM_INTERP_EXTEND;
236   x->mv_limits.col_max = (cm->mi_cols - mi_col) * MI_SIZE + AOM_INTERP_EXTEND;
237 
238   set_plane_n4(xd, mi_width, mi_height, num_planes);
239 
240   // Set up distance of MB to edge of frame in 1/8th pel units.
241   assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
242   set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows,
243                  cm->mi_cols);
244 
245   // Set up source buffers.
246   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
247 
248   // R/D setup.
249   x->rdmult = cpi->rd.RDMULT;
250 
251   // required by av1_append_sub8x8_mvs_for_idx() and av1_find_best_ref_mvs()
252   xd->tile = *tile;
253 
254   xd->cfl.mi_row = mi_row;
255   xd->cfl.mi_col = mi_col;
256 }
257 
set_offsets(const AV1_COMP * const cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)258 static void set_offsets(const AV1_COMP *const cpi, const TileInfo *const tile,
259                         MACROBLOCK *const x, int mi_row, int mi_col,
260                         BLOCK_SIZE bsize) {
261   const AV1_COMMON *const cm = &cpi->common;
262   const struct segmentation *const seg = &cm->seg;
263   MACROBLOCKD *const xd = &x->e_mbd;
264   MB_MODE_INFO *mbmi;
265 
266   set_offsets_without_segment_id(cpi, tile, x, mi_row, mi_col, bsize);
267 
268   // Setup segment ID.
269   mbmi = xd->mi[0];
270   mbmi->segment_id = 0;
271   if (seg->enabled) {
272     if (seg->enabled && !cpi->vaq_refresh) {
273       const uint8_t *const map =
274           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
275       mbmi->segment_id =
276           map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
277     }
278     av1_init_plane_quantizers(cpi, x, mbmi->segment_id);
279   }
280 }
281 
update_filter_type_count(uint8_t allow_update_cdf,FRAME_COUNTS * counts,const MACROBLOCKD * xd,const MB_MODE_INFO * mbmi)282 static void update_filter_type_count(uint8_t allow_update_cdf,
283                                      FRAME_COUNTS *counts,
284                                      const MACROBLOCKD *xd,
285                                      const MB_MODE_INFO *mbmi) {
286   int dir;
287   for (dir = 0; dir < 2; ++dir) {
288     const int ctx = av1_get_pred_context_switchable_interp(xd, dir);
289     InterpFilter filter = av1_extract_interp_filter(mbmi->interp_filters, dir);
290     ++counts->switchable_interp[ctx][filter];
291     if (allow_update_cdf) {
292       update_cdf(xd->tile_ctx->switchable_interp_cdf[ctx], filter,
293                  SWITCHABLE_FILTERS);
294     }
295   }
296 }
297 
update_global_motion_used(PREDICTION_MODE mode,BLOCK_SIZE bsize,const MB_MODE_INFO * mbmi,RD_COUNTS * rdc)298 static void update_global_motion_used(PREDICTION_MODE mode, BLOCK_SIZE bsize,
299                                       const MB_MODE_INFO *mbmi,
300                                       RD_COUNTS *rdc) {
301   if (mode == GLOBALMV || mode == GLOBAL_GLOBALMV) {
302     const int num_4x4s = mi_size_wide[bsize] * mi_size_high[bsize];
303     int ref;
304     for (ref = 0; ref < 1 + has_second_ref(mbmi); ++ref) {
305       rdc->global_motion_used[mbmi->ref_frame[ref]] += num_4x4s;
306     }
307   }
308 }
309 
reset_tx_size(MACROBLOCK * x,MB_MODE_INFO * mbmi,const TX_MODE tx_mode)310 static void reset_tx_size(MACROBLOCK *x, MB_MODE_INFO *mbmi,
311                           const TX_MODE tx_mode) {
312   MACROBLOCKD *const xd = &x->e_mbd;
313   if (xd->lossless[mbmi->segment_id]) {
314     mbmi->tx_size = TX_4X4;
315   } else if (tx_mode != TX_MODE_SELECT) {
316     mbmi->tx_size = tx_size_from_tx_mode(mbmi->sb_type, tx_mode);
317   } else {
318     BLOCK_SIZE bsize = mbmi->sb_type;
319     TX_SIZE min_tx_size = depth_to_tx_size(MAX_TX_DEPTH, bsize);
320     mbmi->tx_size = (TX_SIZE)TXSIZEMAX(mbmi->tx_size, min_tx_size);
321   }
322   if (is_inter_block(mbmi)) {
323     memset(mbmi->inter_tx_size, mbmi->tx_size, sizeof(mbmi->inter_tx_size));
324   }
325   memset(mbmi->txk_type, DCT_DCT, sizeof(mbmi->txk_type[0]) * TXK_TYPE_BUF_LEN);
326   av1_zero(x->blk_skip);
327   x->skip = 0;
328 }
329 
update_state(const AV1_COMP * const cpi,const TileDataEnc * const tile_data,ThreadData * td,const PICK_MODE_CONTEXT * const ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,RUN_TYPE dry_run)330 static void update_state(const AV1_COMP *const cpi,
331                          const TileDataEnc *const tile_data, ThreadData *td,
332                          const PICK_MODE_CONTEXT *const ctx, int mi_row,
333                          int mi_col, BLOCK_SIZE bsize, RUN_TYPE dry_run) {
334   int i, x_idx, y;
335   const AV1_COMMON *const cm = &cpi->common;
336   const int num_planes = av1_num_planes(cm);
337   RD_COUNTS *const rdc = &td->rd_counts;
338   MACROBLOCK *const x = &td->mb;
339   MACROBLOCKD *const xd = &x->e_mbd;
340   struct macroblock_plane *const p = x->plane;
341   struct macroblockd_plane *const pd = xd->plane;
342   const MB_MODE_INFO *const mi = &ctx->mic;
343   MB_MODE_INFO *const mi_addr = xd->mi[0];
344   const struct segmentation *const seg = &cm->seg;
345   const int bw = mi_size_wide[mi->sb_type];
346   const int bh = mi_size_high[mi->sb_type];
347   const int mis = cm->mi_stride;
348   const int mi_width = mi_size_wide[bsize];
349   const int mi_height = mi_size_high[bsize];
350 
351   assert(mi->sb_type == bsize);
352 
353   *mi_addr = *mi;
354   *x->mbmi_ext = ctx->mbmi_ext;
355 
356   memcpy(x->blk_skip, ctx->blk_skip, sizeof(x->blk_skip[0]) * ctx->num_4x4_blk);
357 
358   x->skip = ctx->skip;
359 
360   // If segmentation in use
361   if (seg->enabled) {
362     // For in frame complexity AQ copy the segment id from the segment map.
363     if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
364       const uint8_t *const map =
365           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
366       mi_addr->segment_id =
367           map ? get_segment_id(cm, map, bsize, mi_row, mi_col) : 0;
368       reset_tx_size(x, mi_addr, cm->tx_mode);
369     }
370     // Else for cyclic refresh mode update the segment map, set the segment id
371     // and then update the quantizer.
372     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
373       av1_cyclic_refresh_update_segment(cpi, mi_addr, mi_row, mi_col, bsize,
374                                         ctx->rate, ctx->dist, x->skip);
375     }
376     if (mi_addr->uv_mode == UV_CFL_PRED && !is_cfl_allowed(xd))
377       mi_addr->uv_mode = UV_DC_PRED;
378   }
379 
380   for (i = 0; i < num_planes; ++i) {
381     p[i].coeff = ctx->coeff[i];
382     p[i].qcoeff = ctx->qcoeff[i];
383     pd[i].dqcoeff = ctx->dqcoeff[i];
384     p[i].eobs = ctx->eobs[i];
385     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
386   }
387   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
388   // Restore the coding context of the MB to that that was in place
389   // when the mode was picked for it
390   for (y = 0; y < mi_height; y++)
391     for (x_idx = 0; x_idx < mi_width; x_idx++)
392       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx &&
393           (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
394         xd->mi[x_idx + y * mis] = mi_addr;
395       }
396 
397   if (cpi->oxcf.aq_mode) av1_init_plane_quantizers(cpi, x, mi_addr->segment_id);
398 
399   if (dry_run) return;
400 
401 #if CONFIG_INTERNAL_STATS
402   {
403     unsigned int *const mode_chosen_counts =
404         (unsigned int *)cpi->mode_chosen_counts;  // Cast const away.
405     if (frame_is_intra_only(cm)) {
406       static const int kf_mode_index[] = {
407         THR_DC /*DC_PRED*/,
408         THR_V_PRED /*V_PRED*/,
409         THR_H_PRED /*H_PRED*/,
410         THR_D45_PRED /*D45_PRED*/,
411         THR_D135_PRED /*D135_PRED*/,
412         THR_D113_PRED /*D113_PRED*/,
413         THR_D157_PRED /*D157_PRED*/,
414         THR_D203_PRED /*D203_PRED*/,
415         THR_D67_PRED /*D67_PRED*/,
416         THR_SMOOTH,   /*SMOOTH_PRED*/
417         THR_SMOOTH_V, /*SMOOTH_V_PRED*/
418         THR_SMOOTH_H, /*SMOOTH_H_PRED*/
419         THR_PAETH /*PAETH_PRED*/,
420       };
421       ++mode_chosen_counts[kf_mode_index[mi_addr->mode]];
422     } else {
423       // Note how often each mode chosen as best
424       ++mode_chosen_counts[ctx->best_mode_index];
425     }
426   }
427 #endif
428   if (!frame_is_intra_only(cm)) {
429     if (is_inter_block(mi_addr)) {
430       // TODO(sarahparker): global motion stats need to be handled per-tile
431       // to be compatible with tile-based threading.
432       update_global_motion_used(mi_addr->mode, bsize, mi_addr, rdc);
433     }
434 
435     if (cm->interp_filter == SWITCHABLE &&
436         mi_addr->motion_mode != WARPED_CAUSAL &&
437         !is_nontrans_global_motion(xd, xd->mi[0])) {
438       update_filter_type_count(tile_data->allow_update_cdf, td->counts, xd,
439                                mi_addr);
440     }
441 
442     rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
443     rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
444     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
445   }
446 
447   const int x_mis = AOMMIN(bw, cm->mi_cols - mi_col);
448   const int y_mis = AOMMIN(bh, cm->mi_rows - mi_row);
449   av1_copy_frame_mvs(cm, mi, mi_row, mi_col, x_mis, y_mis);
450 }
451 
av1_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int num_planes,BLOCK_SIZE bsize)452 void av1_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
453                           int mi_row, int mi_col, const int num_planes,
454                           BLOCK_SIZE bsize) {
455   // Set current frame pointer.
456   x->e_mbd.cur_buf = src;
457 
458   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
459   // the static analysis warnings.
460   for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); i++) {
461     const int is_uv = i > 0;
462     setup_pred_plane(
463         &x->plane[i].src, bsize, src->buffers[i], src->crop_widths[is_uv],
464         src->crop_heights[is_uv], src->strides[is_uv], mi_row, mi_col, NULL,
465         x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y);
466   }
467 }
468 
set_segment_rdmult(const AV1_COMP * const cpi,MACROBLOCK * const x,int8_t segment_id)469 static int set_segment_rdmult(const AV1_COMP *const cpi, MACROBLOCK *const x,
470                               int8_t segment_id) {
471   const AV1_COMMON *const cm = &cpi->common;
472   av1_init_plane_quantizers(cpi, x, segment_id);
473   aom_clear_system_state();
474   int segment_qindex = av1_get_qindex(&cm->seg, segment_id, cm->base_qindex);
475   return av1_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
476 }
477 
set_deltaq_rdmult(const AV1_COMP * const cpi,MACROBLOCKD * const xd)478 static int set_deltaq_rdmult(const AV1_COMP *const cpi, MACROBLOCKD *const xd) {
479   const AV1_COMMON *const cm = &cpi->common;
480 
481   return av1_compute_rd_mult(
482       cpi, cm->base_qindex + xd->delta_qindex + cm->y_dc_delta_q);
483 }
484 
edge_info(const struct buf_2d * ref,const BLOCK_SIZE bsize,const bool high_bd,const int bd)485 static EdgeInfo edge_info(const struct buf_2d *ref, const BLOCK_SIZE bsize,
486                           const bool high_bd, const int bd) {
487   const int width = block_size_wide[bsize];
488   const int height = block_size_high[bsize];
489   // Implementation requires width to be a multiple of 8. It also requires
490   // height to be a multiple of 4, but this is always the case.
491   assert(height % 4 == 0);
492   if (width % 8 != 0) {
493     EdgeInfo ei = { .magnitude = 0, .x = 0, .y = 0 };
494     return ei;
495   }
496   return av1_edge_exists(ref->buf, ref->stride, width, height, high_bd, bd);
497 }
498 
use_pb_simple_motion_pred_sse(const AV1_COMP * const cpi)499 static int use_pb_simple_motion_pred_sse(const AV1_COMP *const cpi) {
500   // TODO(debargha, yuec): Not in use, need to implement a speed feature
501   // utilizing this data point, and replace '0' by the corresponding speed
502   // feature flag.
503   return 0 && !frame_is_intra_only(&cpi->common);
504 }
505 
pick_sb_modes(AV1_COMP * const cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_STATS * rd_cost,PARTITION_TYPE partition,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd,int use_nonrd_pick_mode)506 static void pick_sb_modes(AV1_COMP *const cpi, TileDataEnc *tile_data,
507                           MACROBLOCK *const x, int mi_row, int mi_col,
508                           RD_STATS *rd_cost, PARTITION_TYPE partition,
509                           BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
510                           int64_t best_rd, int use_nonrd_pick_mode) {
511   AV1_COMMON *const cm = &cpi->common;
512   const int num_planes = av1_num_planes(cm);
513   TileInfo *const tile_info = &tile_data->tile_info;
514   MACROBLOCKD *const xd = &x->e_mbd;
515   MB_MODE_INFO *mbmi;
516   MB_MODE_INFO *ctx_mbmi = &ctx->mic;
517   struct macroblock_plane *const p = x->plane;
518   struct macroblockd_plane *const pd = xd->plane;
519   const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
520   const DELTAQ_MODE deltaq_mode = cpi->oxcf.deltaq_mode;
521   int i, orig_rdmult;
522 
523 #if CONFIG_COLLECT_COMPONENT_TIMING
524   start_timing(cpi, rd_pick_sb_modes_time);
525 #endif
526 
527   if (best_rd < 0) {
528     ctx->rdcost = INT64_MAX;
529     ctx->skip = 0;
530     av1_invalid_rd_stats(rd_cost);
531     return;
532   }
533 
534   aom_clear_system_state();
535 
536   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
537 
538   mbmi = xd->mi[0];
539 
540   if (ctx->rd_mode_is_ready) {
541     assert(ctx_mbmi->sb_type == bsize);
542     assert(ctx_mbmi->partition == partition);
543     *mbmi = *ctx_mbmi;
544     rd_cost->rate = ctx->rate;
545     rd_cost->dist = ctx->dist;
546     rd_cost->rdcost = ctx->rdcost;
547   } else {
548     mbmi->sb_type = bsize;
549     mbmi->partition = partition;
550   }
551 
552 #if CONFIG_RD_DEBUG
553   mbmi->mi_row = mi_row;
554   mbmi->mi_col = mi_col;
555 #endif
556 
557   for (i = 0; i < num_planes; ++i) {
558     p[i].coeff = ctx->coeff[i];
559     p[i].qcoeff = ctx->qcoeff[i];
560     pd[i].dqcoeff = ctx->dqcoeff[i];
561     p[i].eobs = ctx->eobs[i];
562     p[i].txb_entropy_ctx = ctx->txb_entropy_ctx[i];
563   }
564 
565   for (i = 0; i < 2; ++i) pd[i].color_index_map = ctx->color_index_map[i];
566 
567   if (!ctx->rd_mode_is_ready) {
568     ctx->skippable = 0;
569 
570     // Set to zero to make sure we do not use the previous encoded frame stats
571     mbmi->skip = 0;
572 
573     // Reset skip mode flag.
574     mbmi->skip_mode = 0;
575   }
576 
577   x->skip_chroma_rd =
578       !is_chroma_reference(mi_row, mi_col, bsize, xd->plane[1].subsampling_x,
579                            xd->plane[1].subsampling_y);
580 
581   if (ctx->rd_mode_is_ready) {
582     x->skip = ctx->skip;
583     *x->mbmi_ext = ctx->mbmi_ext;
584     return;
585   }
586 
587   if (is_cur_buf_hbd(xd)) {
588     x->source_variance = av1_high_get_sby_perpixel_variance(
589         cpi, &x->plane[0].src, bsize, xd->bd);
590   } else {
591     x->source_variance =
592         av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
593   }
594   if (use_pb_simple_motion_pred_sse(cpi)) {
595     const MV ref_mv_full = { .row = 0, .col = 0 };
596     unsigned int var = 0;
597     av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, 0,
598                               &x->simple_motion_pred_sse, &var);
599   }
600 
601   // If the threshold for disabling wedge search is zero, it means the feature
602   // should not be used. Use a value that will always succeed in the check.
603   if (cpi->sf.disable_wedge_search_edge_thresh == 0) {
604     x->edge_strength = UINT16_MAX;
605     x->edge_strength_x = UINT16_MAX;
606     x->edge_strength_y = UINT16_MAX;
607   } else {
608     EdgeInfo ei =
609         edge_info(&x->plane[0].src, bsize, is_cur_buf_hbd(xd), xd->bd);
610     x->edge_strength = ei.magnitude;
611     x->edge_strength_x = ei.x;
612     x->edge_strength_y = ei.y;
613   }
614   // Save rdmult before it might be changed, so it can be restored later.
615   orig_rdmult = x->rdmult;
616 
617   if (aq_mode == VARIANCE_AQ) {
618     if (cpi->vaq_refresh) {
619       const int energy = bsize <= BLOCK_16X16
620                              ? x->mb_energy
621                              : av1_log_block_var(cpi, x, bsize);
622       mbmi->segment_id = energy;
623     }
624     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
625   } else if (aq_mode == COMPLEXITY_AQ) {
626     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
627   } else if (aq_mode == CYCLIC_REFRESH_AQ) {
628     // If segment is boosted, use rdmult for that segment.
629     if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
630       x->rdmult = av1_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
631   } else if (cpi->oxcf.enable_tpl_model) {
632     x->rdmult = x->cb_rdmult;
633   }
634 
635   if (deltaq_mode > 0) x->rdmult = set_deltaq_rdmult(cpi, xd);
636 
637   // Find best coding mode & reconstruct the MB so it is available
638   // as a predictor for MBs that follow in the SB
639   if (frame_is_intra_only(cm)) {
640 #if CONFIG_COLLECT_COMPONENT_TIMING
641     start_timing(cpi, av1_rd_pick_intra_mode_sb_time);
642 #endif
643     av1_rd_pick_intra_mode_sb(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx,
644                               best_rd);
645 #if CONFIG_COLLECT_COMPONENT_TIMING
646     end_timing(cpi, av1_rd_pick_intra_mode_sb_time);
647 #endif
648   } else {
649 #if CONFIG_COLLECT_COMPONENT_TIMING
650     start_timing(cpi, av1_rd_pick_inter_mode_sb_time);
651 #endif
652     if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
653       av1_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, mi_row, mi_col,
654                                          rd_cost, bsize, ctx, best_rd);
655     } else {
656       // TODO(kyslov): do the same for pick_intra_mode and
657       //               pick_inter_mode_sb_seg_skip
658       if (use_nonrd_pick_mode) {
659         av1_nonrd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
660                                      bsize, ctx, best_rd);
661       } else {
662         av1_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost,
663                                   bsize, ctx, best_rd);
664       }
665     }
666 #if CONFIG_COLLECT_COMPONENT_TIMING
667     end_timing(cpi, av1_rd_pick_inter_mode_sb_time);
668 #endif
669   }
670 
671   // Examine the resulting rate and for AQ mode 2 make a segment choice.
672   if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) &&
673       (bsize >= BLOCK_16X16) &&
674       (cm->current_frame.frame_type == KEY_FRAME ||
675        cpi->refresh_alt_ref_frame || cpi->refresh_alt2_ref_frame ||
676        (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
677     av1_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
678   }
679 
680   x->rdmult = orig_rdmult;
681 
682   // TODO(jingning) The rate-distortion optimization flow needs to be
683   // refactored to provide proper exit/return handle.
684   if (rd_cost->rate == INT_MAX) rd_cost->rdcost = INT64_MAX;
685 
686   ctx->rate = rd_cost->rate;
687   ctx->dist = rd_cost->dist;
688   ctx->rdcost = rd_cost->rdcost;
689 
690 #if CONFIG_COLLECT_COMPONENT_TIMING
691   end_timing(cpi, rd_pick_sb_modes_time);
692 #endif
693 }
694 
update_inter_mode_stats(FRAME_CONTEXT * fc,FRAME_COUNTS * counts,PREDICTION_MODE mode,int16_t mode_context,uint8_t allow_update_cdf)695 static void update_inter_mode_stats(FRAME_CONTEXT *fc, FRAME_COUNTS *counts,
696                                     PREDICTION_MODE mode, int16_t mode_context,
697                                     uint8_t allow_update_cdf) {
698   (void)counts;
699 
700   int16_t mode_ctx = mode_context & NEWMV_CTX_MASK;
701   if (mode == NEWMV) {
702 #if CONFIG_ENTROPY_STATS
703     ++counts->newmv_mode[mode_ctx][0];
704 #endif
705     if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 0, 2);
706     return;
707   } else {
708 #if CONFIG_ENTROPY_STATS
709     ++counts->newmv_mode[mode_ctx][1];
710 #endif
711     if (allow_update_cdf) update_cdf(fc->newmv_cdf[mode_ctx], 1, 2);
712 
713     mode_ctx = (mode_context >> GLOBALMV_OFFSET) & GLOBALMV_CTX_MASK;
714     if (mode == GLOBALMV) {
715 #if CONFIG_ENTROPY_STATS
716       ++counts->zeromv_mode[mode_ctx][0];
717 #endif
718       if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 0, 2);
719       return;
720     } else {
721 #if CONFIG_ENTROPY_STATS
722       ++counts->zeromv_mode[mode_ctx][1];
723 #endif
724       if (allow_update_cdf) update_cdf(fc->zeromv_cdf[mode_ctx], 1, 2);
725       mode_ctx = (mode_context >> REFMV_OFFSET) & REFMV_CTX_MASK;
726 #if CONFIG_ENTROPY_STATS
727       ++counts->refmv_mode[mode_ctx][mode != NEARESTMV];
728 #endif
729       if (allow_update_cdf)
730         update_cdf(fc->refmv_cdf[mode_ctx], mode != NEARESTMV, 2);
731     }
732   }
733 }
734 
update_palette_cdf(MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,FRAME_COUNTS * counts,uint8_t allow_update_cdf)735 static void update_palette_cdf(MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
736                                FRAME_COUNTS *counts, uint8_t allow_update_cdf) {
737   FRAME_CONTEXT *fc = xd->tile_ctx;
738   const BLOCK_SIZE bsize = mbmi->sb_type;
739   const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
740   const int palette_bsize_ctx = av1_get_palette_bsize_ctx(bsize);
741 
742   (void)counts;
743 
744   if (mbmi->mode == DC_PRED) {
745     const int n = pmi->palette_size[0];
746     const int palette_mode_ctx = av1_get_palette_mode_ctx(xd);
747 
748 #if CONFIG_ENTROPY_STATS
749     ++counts->palette_y_mode[palette_bsize_ctx][palette_mode_ctx][n > 0];
750 #endif
751     if (allow_update_cdf)
752       update_cdf(fc->palette_y_mode_cdf[palette_bsize_ctx][palette_mode_ctx],
753                  n > 0, 2);
754     if (n > 0) {
755 #if CONFIG_ENTROPY_STATS
756       ++counts->palette_y_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
757 #endif
758       if (allow_update_cdf) {
759         update_cdf(fc->palette_y_size_cdf[palette_bsize_ctx],
760                    n - PALETTE_MIN_SIZE, PALETTE_SIZES);
761       }
762     }
763   }
764 
765   if (mbmi->uv_mode == UV_DC_PRED) {
766     const int n = pmi->palette_size[1];
767     const int palette_uv_mode_ctx = (pmi->palette_size[0] > 0);
768 
769 #if CONFIG_ENTROPY_STATS
770     ++counts->palette_uv_mode[palette_uv_mode_ctx][n > 0];
771 #endif
772     if (allow_update_cdf)
773       update_cdf(fc->palette_uv_mode_cdf[palette_uv_mode_ctx], n > 0, 2);
774 
775     if (n > 0) {
776 #if CONFIG_ENTROPY_STATS
777       ++counts->palette_uv_size[palette_bsize_ctx][n - PALETTE_MIN_SIZE];
778 #endif
779       if (allow_update_cdf) {
780         update_cdf(fc->palette_uv_size_cdf[palette_bsize_ctx],
781                    n - PALETTE_MIN_SIZE, PALETTE_SIZES);
782       }
783     }
784   }
785 }
786 
sum_intra_stats(const AV1_COMMON * const cm,FRAME_COUNTS * counts,MACROBLOCKD * xd,const MB_MODE_INFO * const mbmi,const MB_MODE_INFO * above_mi,const MB_MODE_INFO * left_mi,const int intraonly,const int mi_row,const int mi_col,uint8_t allow_update_cdf)787 static void sum_intra_stats(const AV1_COMMON *const cm, FRAME_COUNTS *counts,
788                             MACROBLOCKD *xd, const MB_MODE_INFO *const mbmi,
789                             const MB_MODE_INFO *above_mi,
790                             const MB_MODE_INFO *left_mi, const int intraonly,
791                             const int mi_row, const int mi_col,
792                             uint8_t allow_update_cdf) {
793   FRAME_CONTEXT *fc = xd->tile_ctx;
794   const PREDICTION_MODE y_mode = mbmi->mode;
795   const UV_PREDICTION_MODE uv_mode = mbmi->uv_mode;
796   (void)counts;
797   const BLOCK_SIZE bsize = mbmi->sb_type;
798 
799   if (intraonly) {
800 #if CONFIG_ENTROPY_STATS
801     const PREDICTION_MODE above = av1_above_block_mode(above_mi);
802     const PREDICTION_MODE left = av1_left_block_mode(left_mi);
803     const int above_ctx = intra_mode_context[above];
804     const int left_ctx = intra_mode_context[left];
805     ++counts->kf_y_mode[above_ctx][left_ctx][y_mode];
806 #endif  // CONFIG_ENTROPY_STATS
807     if (allow_update_cdf)
808       update_cdf(get_y_mode_cdf(fc, above_mi, left_mi), y_mode, INTRA_MODES);
809   } else {
810 #if CONFIG_ENTROPY_STATS
811     ++counts->y_mode[size_group_lookup[bsize]][y_mode];
812 #endif  // CONFIG_ENTROPY_STATS
813     if (allow_update_cdf)
814       update_cdf(fc->y_mode_cdf[size_group_lookup[bsize]], y_mode, INTRA_MODES);
815   }
816 
817   if (av1_filter_intra_allowed(cm, mbmi)) {
818     const int use_filter_intra_mode =
819         mbmi->filter_intra_mode_info.use_filter_intra;
820 #if CONFIG_ENTROPY_STATS
821     ++counts->filter_intra[mbmi->sb_type][use_filter_intra_mode];
822     if (use_filter_intra_mode) {
823       ++counts
824             ->filter_intra_mode[mbmi->filter_intra_mode_info.filter_intra_mode];
825     }
826 #endif  // CONFIG_ENTROPY_STATS
827     if (allow_update_cdf) {
828       update_cdf(fc->filter_intra_cdfs[mbmi->sb_type], use_filter_intra_mode,
829                  2);
830       if (use_filter_intra_mode) {
831         update_cdf(fc->filter_intra_mode_cdf,
832                    mbmi->filter_intra_mode_info.filter_intra_mode,
833                    FILTER_INTRA_MODES);
834       }
835     }
836   }
837   if (av1_is_directional_mode(mbmi->mode) && av1_use_angle_delta(bsize)) {
838 #if CONFIG_ENTROPY_STATS
839     ++counts->angle_delta[mbmi->mode - V_PRED]
840                          [mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA];
841 #endif
842     if (allow_update_cdf) {
843       update_cdf(fc->angle_delta_cdf[mbmi->mode - V_PRED],
844                  mbmi->angle_delta[PLANE_TYPE_Y] + MAX_ANGLE_DELTA,
845                  2 * MAX_ANGLE_DELTA + 1);
846     }
847   }
848 
849   if (!is_chroma_reference(mi_row, mi_col, bsize,
850                            xd->plane[AOM_PLANE_U].subsampling_x,
851                            xd->plane[AOM_PLANE_U].subsampling_y))
852     return;
853 
854 #if CONFIG_ENTROPY_STATS
855   ++counts->uv_mode[is_cfl_allowed(xd)][y_mode][uv_mode];
856 #endif  // CONFIG_ENTROPY_STATS
857   if (allow_update_cdf) {
858     const CFL_ALLOWED_TYPE cfl_allowed = is_cfl_allowed(xd);
859     update_cdf(fc->uv_mode_cdf[cfl_allowed][y_mode], uv_mode,
860                UV_INTRA_MODES - !cfl_allowed);
861   }
862   if (uv_mode == UV_CFL_PRED) {
863     const int joint_sign = mbmi->cfl_alpha_signs;
864     const int idx = mbmi->cfl_alpha_idx;
865 
866 #if CONFIG_ENTROPY_STATS
867     ++counts->cfl_sign[joint_sign];
868 #endif
869     if (allow_update_cdf)
870       update_cdf(fc->cfl_sign_cdf, joint_sign, CFL_JOINT_SIGNS);
871     if (CFL_SIGN_U(joint_sign) != CFL_SIGN_ZERO) {
872       aom_cdf_prob *cdf_u = fc->cfl_alpha_cdf[CFL_CONTEXT_U(joint_sign)];
873 
874 #if CONFIG_ENTROPY_STATS
875       ++counts->cfl_alpha[CFL_CONTEXT_U(joint_sign)][CFL_IDX_U(idx)];
876 #endif
877       if (allow_update_cdf)
878         update_cdf(cdf_u, CFL_IDX_U(idx), CFL_ALPHABET_SIZE);
879     }
880     if (CFL_SIGN_V(joint_sign) != CFL_SIGN_ZERO) {
881       aom_cdf_prob *cdf_v = fc->cfl_alpha_cdf[CFL_CONTEXT_V(joint_sign)];
882 
883 #if CONFIG_ENTROPY_STATS
884       ++counts->cfl_alpha[CFL_CONTEXT_V(joint_sign)][CFL_IDX_V(idx)];
885 #endif
886       if (allow_update_cdf)
887         update_cdf(cdf_v, CFL_IDX_V(idx), CFL_ALPHABET_SIZE);
888     }
889   }
890   if (av1_is_directional_mode(get_uv_mode(uv_mode)) &&
891       av1_use_angle_delta(bsize)) {
892 #if CONFIG_ENTROPY_STATS
893     ++counts->angle_delta[uv_mode - UV_V_PRED]
894                          [mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA];
895 #endif
896     if (allow_update_cdf) {
897       update_cdf(fc->angle_delta_cdf[uv_mode - UV_V_PRED],
898                  mbmi->angle_delta[PLANE_TYPE_UV] + MAX_ANGLE_DELTA,
899                  2 * MAX_ANGLE_DELTA + 1);
900     }
901   }
902   if (av1_allow_palette(cm->allow_screen_content_tools, bsize))
903     update_palette_cdf(xd, mbmi, counts, allow_update_cdf);
904 }
905 
update_stats(const AV1_COMMON * const cm,TileDataEnc * tile_data,ThreadData * td,int mi_row,int mi_col)906 static void update_stats(const AV1_COMMON *const cm, TileDataEnc *tile_data,
907                          ThreadData *td, int mi_row, int mi_col) {
908   MACROBLOCK *x = &td->mb;
909   MACROBLOCKD *const xd = &x->e_mbd;
910   const MB_MODE_INFO *const mbmi = xd->mi[0];
911   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
912   const CurrentFrame *const current_frame = &cm->current_frame;
913   const BLOCK_SIZE bsize = mbmi->sb_type;
914   FRAME_CONTEXT *fc = xd->tile_ctx;
915   const uint8_t allow_update_cdf = tile_data->allow_update_cdf;
916 
917   // delta quant applies to both intra and inter
918   const int super_block_upper_left =
919       ((mi_row & (cm->seq_params.mib_size - 1)) == 0) &&
920       ((mi_col & (cm->seq_params.mib_size - 1)) == 0);
921 
922   const int seg_ref_active =
923       segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_REF_FRAME);
924 
925   if (current_frame->skip_mode_info.skip_mode_flag && !seg_ref_active &&
926       is_comp_ref_allowed(bsize)) {
927     const int skip_mode_ctx = av1_get_skip_mode_context(xd);
928 #if CONFIG_ENTROPY_STATS
929     td->counts->skip_mode[skip_mode_ctx][mbmi->skip_mode]++;
930 #endif
931     if (allow_update_cdf)
932       update_cdf(fc->skip_mode_cdfs[skip_mode_ctx], mbmi->skip_mode, 2);
933   }
934 
935   if (!mbmi->skip_mode) {
936     if (!seg_ref_active) {
937       const int skip_ctx = av1_get_skip_context(xd);
938 #if CONFIG_ENTROPY_STATS
939       td->counts->skip[skip_ctx][mbmi->skip]++;
940 #endif
941       if (allow_update_cdf) update_cdf(fc->skip_cdfs[skip_ctx], mbmi->skip, 2);
942     }
943   }
944 
945   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
946   if (delta_q_info->delta_q_present_flag &&
947       (bsize != cm->seq_params.sb_size || !mbmi->skip) &&
948       super_block_upper_left) {
949 #if CONFIG_ENTROPY_STATS
950     const int dq =
951         (mbmi->current_qindex - xd->current_qindex) / delta_q_info->delta_q_res;
952     const int absdq = abs(dq);
953     for (int i = 0; i < AOMMIN(absdq, DELTA_Q_SMALL); ++i) {
954       td->counts->delta_q[i][1]++;
955     }
956     if (absdq < DELTA_Q_SMALL) td->counts->delta_q[absdq][0]++;
957 #endif
958     xd->current_qindex = mbmi->current_qindex;
959     if (delta_q_info->delta_lf_present_flag) {
960       if (delta_q_info->delta_lf_multi) {
961         const int frame_lf_count =
962             av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
963         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
964 #if CONFIG_ENTROPY_STATS
965           const int delta_lf = (mbmi->delta_lf[lf_id] - xd->delta_lf[lf_id]) /
966                                delta_q_info->delta_lf_res;
967           const int abs_delta_lf = abs(delta_lf);
968           for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
969             td->counts->delta_lf_multi[lf_id][i][1]++;
970           }
971           if (abs_delta_lf < DELTA_LF_SMALL)
972             td->counts->delta_lf_multi[lf_id][abs_delta_lf][0]++;
973 #endif
974           xd->delta_lf[lf_id] = mbmi->delta_lf[lf_id];
975         }
976       } else {
977 #if CONFIG_ENTROPY_STATS
978         const int delta_lf =
979             (mbmi->delta_lf_from_base - xd->delta_lf_from_base) /
980             delta_q_info->delta_lf_res;
981         const int abs_delta_lf = abs(delta_lf);
982         for (int i = 0; i < AOMMIN(abs_delta_lf, DELTA_LF_SMALL); ++i) {
983           td->counts->delta_lf[i][1]++;
984         }
985         if (abs_delta_lf < DELTA_LF_SMALL)
986           td->counts->delta_lf[abs_delta_lf][0]++;
987 #endif
988         xd->delta_lf_from_base = mbmi->delta_lf_from_base;
989       }
990     }
991   }
992 
993   if (!is_inter_block(mbmi)) {
994     sum_intra_stats(cm, td->counts, xd, mbmi, xd->above_mbmi, xd->left_mbmi,
995                     frame_is_intra_only(cm), mi_row, mi_col,
996                     tile_data->allow_update_cdf);
997   }
998 
999   if (av1_allow_intrabc(cm)) {
1000     if (allow_update_cdf)
1001       update_cdf(fc->intrabc_cdf, is_intrabc_block(mbmi), 2);
1002 #if CONFIG_ENTROPY_STATS
1003     ++td->counts->intrabc[is_intrabc_block(mbmi)];
1004 #endif  // CONFIG_ENTROPY_STATS
1005   }
1006 
1007   if (!frame_is_intra_only(cm)) {
1008     RD_COUNTS *rdc = &td->rd_counts;
1009 
1010     FRAME_COUNTS *const counts = td->counts;
1011 
1012     if (mbmi->skip_mode) {
1013       rdc->skip_mode_used_flag = 1;
1014       if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1015         assert(has_second_ref(mbmi));
1016         rdc->compound_ref_used_flag = 1;
1017       }
1018       set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1019       return;
1020     }
1021 
1022     const int inter_block = is_inter_block(mbmi);
1023 
1024     if (!seg_ref_active) {
1025 #if CONFIG_ENTROPY_STATS
1026       counts->intra_inter[av1_get_intra_inter_context(xd)][inter_block]++;
1027 #endif
1028       if (allow_update_cdf) {
1029         update_cdf(fc->intra_inter_cdf[av1_get_intra_inter_context(xd)],
1030                    inter_block, 2);
1031       }
1032       // If the segment reference feature is enabled we have only a single
1033       // reference frame allowed for the segment so exclude it from
1034       // the reference frame counts used to work out probabilities.
1035       if (inter_block) {
1036         const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1037         const MV_REFERENCE_FRAME ref1 = mbmi->ref_frame[1];
1038 
1039         av1_collect_neighbors_ref_counts(xd);
1040 
1041         if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
1042           if (has_second_ref(mbmi))
1043             // This flag is also updated for 4x4 blocks
1044             rdc->compound_ref_used_flag = 1;
1045           if (is_comp_ref_allowed(bsize)) {
1046 #if CONFIG_ENTROPY_STATS
1047             counts->comp_inter[av1_get_reference_mode_context(xd)]
1048                               [has_second_ref(mbmi)]++;
1049 #endif  // CONFIG_ENTROPY_STATS
1050             if (allow_update_cdf) {
1051               update_cdf(av1_get_reference_mode_cdf(xd), has_second_ref(mbmi),
1052                          2);
1053             }
1054           }
1055         }
1056 
1057         if (has_second_ref(mbmi)) {
1058           const COMP_REFERENCE_TYPE comp_ref_type = has_uni_comp_refs(mbmi)
1059                                                         ? UNIDIR_COMP_REFERENCE
1060                                                         : BIDIR_COMP_REFERENCE;
1061           if (allow_update_cdf) {
1062             update_cdf(av1_get_comp_reference_type_cdf(xd), comp_ref_type,
1063                        COMP_REFERENCE_TYPES);
1064           }
1065 #if CONFIG_ENTROPY_STATS
1066           counts->comp_ref_type[av1_get_comp_reference_type_context(xd)]
1067                                [comp_ref_type]++;
1068 #endif  // CONFIG_ENTROPY_STATS
1069 
1070           if (comp_ref_type == UNIDIR_COMP_REFERENCE) {
1071             const int bit = (ref0 == BWDREF_FRAME);
1072             if (allow_update_cdf)
1073               update_cdf(av1_get_pred_cdf_uni_comp_ref_p(xd), bit, 2);
1074 #if CONFIG_ENTROPY_STATS
1075             counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p(xd)][0]
1076                                 [bit]++;
1077 #endif  // CONFIG_ENTROPY_STATS
1078             if (!bit) {
1079               const int bit1 = (ref1 == LAST3_FRAME || ref1 == GOLDEN_FRAME);
1080               if (allow_update_cdf)
1081                 update_cdf(av1_get_pred_cdf_uni_comp_ref_p1(xd), bit1, 2);
1082 #if CONFIG_ENTROPY_STATS
1083               counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p1(xd)][1]
1084                                   [bit1]++;
1085 #endif  // CONFIG_ENTROPY_STATS
1086               if (bit1) {
1087                 if (allow_update_cdf) {
1088                   update_cdf(av1_get_pred_cdf_uni_comp_ref_p2(xd),
1089                              ref1 == GOLDEN_FRAME, 2);
1090                 }
1091 #if CONFIG_ENTROPY_STATS
1092                 counts->uni_comp_ref[av1_get_pred_context_uni_comp_ref_p2(xd)]
1093                                     [2][ref1 == GOLDEN_FRAME]++;
1094 #endif  // CONFIG_ENTROPY_STATS
1095               }
1096             }
1097           } else {
1098             const int bit = (ref0 == GOLDEN_FRAME || ref0 == LAST3_FRAME);
1099             if (allow_update_cdf)
1100               update_cdf(av1_get_pred_cdf_comp_ref_p(xd), bit, 2);
1101 #if CONFIG_ENTROPY_STATS
1102             counts->comp_ref[av1_get_pred_context_comp_ref_p(xd)][0][bit]++;
1103 #endif  // CONFIG_ENTROPY_STATS
1104             if (!bit) {
1105               if (allow_update_cdf) {
1106                 update_cdf(av1_get_pred_cdf_comp_ref_p1(xd),
1107                            ref0 == LAST2_FRAME, 2);
1108               }
1109 #if CONFIG_ENTROPY_STATS
1110               counts->comp_ref[av1_get_pred_context_comp_ref_p1(xd)][1]
1111                               [ref0 == LAST2_FRAME]++;
1112 #endif  // CONFIG_ENTROPY_STATS
1113             } else {
1114               if (allow_update_cdf) {
1115                 update_cdf(av1_get_pred_cdf_comp_ref_p2(xd),
1116                            ref0 == GOLDEN_FRAME, 2);
1117               }
1118 #if CONFIG_ENTROPY_STATS
1119               counts->comp_ref[av1_get_pred_context_comp_ref_p2(xd)][2]
1120                               [ref0 == GOLDEN_FRAME]++;
1121 #endif  // CONFIG_ENTROPY_STATS
1122             }
1123             if (allow_update_cdf) {
1124               update_cdf(av1_get_pred_cdf_comp_bwdref_p(xd),
1125                          ref1 == ALTREF_FRAME, 2);
1126             }
1127 #if CONFIG_ENTROPY_STATS
1128             counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p(xd)][0]
1129                                [ref1 == ALTREF_FRAME]++;
1130 #endif  // CONFIG_ENTROPY_STATS
1131             if (ref1 != ALTREF_FRAME) {
1132               if (allow_update_cdf) {
1133                 update_cdf(av1_get_pred_cdf_comp_bwdref_p1(xd),
1134                            ref1 == ALTREF2_FRAME, 2);
1135               }
1136 #if CONFIG_ENTROPY_STATS
1137               counts->comp_bwdref[av1_get_pred_context_comp_bwdref_p1(xd)][1]
1138                                  [ref1 == ALTREF2_FRAME]++;
1139 #endif  // CONFIG_ENTROPY_STATS
1140             }
1141           }
1142         } else {
1143           const int bit = (ref0 >= BWDREF_FRAME);
1144           if (allow_update_cdf)
1145             update_cdf(av1_get_pred_cdf_single_ref_p1(xd), bit, 2);
1146 #if CONFIG_ENTROPY_STATS
1147           counts->single_ref[av1_get_pred_context_single_ref_p1(xd)][0][bit]++;
1148 #endif  // CONFIG_ENTROPY_STATS
1149           if (bit) {
1150             assert(ref0 <= ALTREF_FRAME);
1151             if (allow_update_cdf) {
1152               update_cdf(av1_get_pred_cdf_single_ref_p2(xd),
1153                          ref0 == ALTREF_FRAME, 2);
1154             }
1155 #if CONFIG_ENTROPY_STATS
1156             counts->single_ref[av1_get_pred_context_single_ref_p2(xd)][1]
1157                               [ref0 == ALTREF_FRAME]++;
1158 #endif  // CONFIG_ENTROPY_STATS
1159             if (ref0 != ALTREF_FRAME) {
1160               if (allow_update_cdf) {
1161                 update_cdf(av1_get_pred_cdf_single_ref_p6(xd),
1162                            ref0 == ALTREF2_FRAME, 2);
1163               }
1164 #if CONFIG_ENTROPY_STATS
1165               counts->single_ref[av1_get_pred_context_single_ref_p6(xd)][5]
1166                                 [ref0 == ALTREF2_FRAME]++;
1167 #endif  // CONFIG_ENTROPY_STATS
1168             }
1169           } else {
1170             const int bit1 = !(ref0 == LAST2_FRAME || ref0 == LAST_FRAME);
1171             if (allow_update_cdf)
1172               update_cdf(av1_get_pred_cdf_single_ref_p3(xd), bit1, 2);
1173 #if CONFIG_ENTROPY_STATS
1174             counts
1175                 ->single_ref[av1_get_pred_context_single_ref_p3(xd)][2][bit1]++;
1176 #endif  // CONFIG_ENTROPY_STATS
1177             if (!bit1) {
1178               if (allow_update_cdf) {
1179                 update_cdf(av1_get_pred_cdf_single_ref_p4(xd),
1180                            ref0 != LAST_FRAME, 2);
1181               }
1182 #if CONFIG_ENTROPY_STATS
1183               counts->single_ref[av1_get_pred_context_single_ref_p4(xd)][3]
1184                                 [ref0 != LAST_FRAME]++;
1185 #endif  // CONFIG_ENTROPY_STATS
1186             } else {
1187               if (allow_update_cdf) {
1188                 update_cdf(av1_get_pred_cdf_single_ref_p5(xd),
1189                            ref0 != LAST3_FRAME, 2);
1190               }
1191 #if CONFIG_ENTROPY_STATS
1192               counts->single_ref[av1_get_pred_context_single_ref_p5(xd)][4]
1193                                 [ref0 != LAST3_FRAME]++;
1194 #endif  // CONFIG_ENTROPY_STATS
1195             }
1196           }
1197         }
1198 
1199         if (cm->seq_params.enable_interintra_compound &&
1200             is_interintra_allowed(mbmi)) {
1201           const int bsize_group = size_group_lookup[bsize];
1202           if (mbmi->ref_frame[1] == INTRA_FRAME) {
1203 #if CONFIG_ENTROPY_STATS
1204             counts->interintra[bsize_group][1]++;
1205 #endif
1206             if (allow_update_cdf)
1207               update_cdf(fc->interintra_cdf[bsize_group], 1, 2);
1208 #if CONFIG_ENTROPY_STATS
1209             counts->interintra_mode[bsize_group][mbmi->interintra_mode]++;
1210 #endif
1211             if (allow_update_cdf) {
1212               update_cdf(fc->interintra_mode_cdf[bsize_group],
1213                          mbmi->interintra_mode, INTERINTRA_MODES);
1214             }
1215             if (is_interintra_wedge_used(bsize)) {
1216 #if CONFIG_ENTROPY_STATS
1217               counts->wedge_interintra[bsize][mbmi->use_wedge_interintra]++;
1218 #endif
1219               if (allow_update_cdf) {
1220                 update_cdf(fc->wedge_interintra_cdf[bsize],
1221                            mbmi->use_wedge_interintra, 2);
1222               }
1223               if (mbmi->use_wedge_interintra) {
1224 #if CONFIG_ENTROPY_STATS
1225                 counts->wedge_idx[bsize][mbmi->interintra_wedge_index]++;
1226 #endif
1227                 if (allow_update_cdf) {
1228                   update_cdf(fc->wedge_idx_cdf[bsize],
1229                              mbmi->interintra_wedge_index, 16);
1230                 }
1231               }
1232             }
1233           } else {
1234 #if CONFIG_ENTROPY_STATS
1235             counts->interintra[bsize_group][0]++;
1236 #endif
1237             if (allow_update_cdf)
1238               update_cdf(fc->interintra_cdf[bsize_group], 0, 2);
1239           }
1240         }
1241 
1242         set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
1243         const MOTION_MODE motion_allowed =
1244             cm->switchable_motion_mode
1245                 ? motion_mode_allowed(xd->global_motion, xd, mbmi,
1246                                       cm->allow_warped_motion)
1247                 : SIMPLE_TRANSLATION;
1248         if (mbmi->ref_frame[1] != INTRA_FRAME) {
1249           if (motion_allowed == WARPED_CAUSAL) {
1250 #if CONFIG_ENTROPY_STATS
1251             counts->motion_mode[bsize][mbmi->motion_mode]++;
1252 #endif
1253             if (allow_update_cdf) {
1254               update_cdf(fc->motion_mode_cdf[bsize], mbmi->motion_mode,
1255                          MOTION_MODES);
1256             }
1257           } else if (motion_allowed == OBMC_CAUSAL) {
1258 #if CONFIG_ENTROPY_STATS
1259             counts->obmc[bsize][mbmi->motion_mode == OBMC_CAUSAL]++;
1260 #endif
1261             if (allow_update_cdf) {
1262               update_cdf(fc->obmc_cdf[bsize], mbmi->motion_mode == OBMC_CAUSAL,
1263                          2);
1264             }
1265           }
1266         }
1267 
1268         if (has_second_ref(mbmi)) {
1269           assert(current_frame->reference_mode != SINGLE_REFERENCE &&
1270                  is_inter_compound_mode(mbmi->mode) &&
1271                  mbmi->motion_mode == SIMPLE_TRANSLATION);
1272 
1273           const int masked_compound_used =
1274               is_any_masked_compound_used(bsize) &&
1275               cm->seq_params.enable_masked_compound;
1276           if (masked_compound_used) {
1277             const int comp_group_idx_ctx = get_comp_group_idx_context(xd);
1278 #if CONFIG_ENTROPY_STATS
1279             ++counts->comp_group_idx[comp_group_idx_ctx][mbmi->comp_group_idx];
1280 #endif
1281             if (allow_update_cdf) {
1282               update_cdf(fc->comp_group_idx_cdf[comp_group_idx_ctx],
1283                          mbmi->comp_group_idx, 2);
1284             }
1285           }
1286 
1287           if (mbmi->comp_group_idx == 0) {
1288             const int comp_index_ctx = get_comp_index_context(cm, xd);
1289 #if CONFIG_ENTROPY_STATS
1290             ++counts->compound_index[comp_index_ctx][mbmi->compound_idx];
1291 #endif
1292             if (allow_update_cdf) {
1293               update_cdf(fc->compound_index_cdf[comp_index_ctx],
1294                          mbmi->compound_idx, 2);
1295             }
1296           } else {
1297             assert(masked_compound_used);
1298             if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1299 #if CONFIG_ENTROPY_STATS
1300               ++counts->compound_type[bsize][mbmi->interinter_comp.type -
1301                                              COMPOUND_WEDGE];
1302 #endif
1303               if (allow_update_cdf) {
1304                 update_cdf(fc->compound_type_cdf[bsize],
1305                            mbmi->interinter_comp.type - COMPOUND_WEDGE,
1306                            MASKED_COMPOUND_TYPES);
1307               }
1308             }
1309           }
1310         }
1311         if (mbmi->interinter_comp.type == COMPOUND_WEDGE) {
1312           if (is_interinter_compound_used(COMPOUND_WEDGE, bsize)) {
1313 #if CONFIG_ENTROPY_STATS
1314             counts->wedge_idx[bsize][mbmi->interinter_comp.wedge_index]++;
1315 #endif
1316             if (allow_update_cdf) {
1317               update_cdf(fc->wedge_idx_cdf[bsize],
1318                          mbmi->interinter_comp.wedge_index, 16);
1319             }
1320           }
1321         }
1322       }
1323     }
1324 
1325     if (inter_block &&
1326         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1327       int16_t mode_ctx;
1328       const PREDICTION_MODE mode = mbmi->mode;
1329 
1330       mode_ctx =
1331           av1_mode_context_analyzer(mbmi_ext->mode_context, mbmi->ref_frame);
1332       if (has_second_ref(mbmi)) {
1333 #if CONFIG_ENTROPY_STATS
1334         ++counts->inter_compound_mode[mode_ctx][INTER_COMPOUND_OFFSET(mode)];
1335 #endif
1336         if (allow_update_cdf)
1337           update_cdf(fc->inter_compound_mode_cdf[mode_ctx],
1338                      INTER_COMPOUND_OFFSET(mode), INTER_COMPOUND_MODES);
1339       } else {
1340         update_inter_mode_stats(fc, counts, mode, mode_ctx, allow_update_cdf);
1341       }
1342 
1343       int mode_allowed = (mbmi->mode == NEWMV);
1344       mode_allowed |= (mbmi->mode == NEW_NEWMV);
1345       if (mode_allowed) {
1346         uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1347         int idx;
1348 
1349         for (idx = 0; idx < 2; ++idx) {
1350           if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1351 #if CONFIG_ENTROPY_STATS
1352             uint8_t drl_ctx =
1353                 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
1354             ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx];
1355 #endif
1356 
1357             if (mbmi->ref_mv_idx == idx) break;
1358           }
1359         }
1360       }
1361 
1362       if (have_nearmv_in_inter_mode(mbmi->mode)) {
1363         uint8_t ref_frame_type = av1_ref_frame_type(mbmi->ref_frame);
1364         int idx;
1365 
1366         for (idx = 1; idx < 3; ++idx) {
1367           if (mbmi_ext->ref_mv_count[ref_frame_type] > idx + 1) {
1368 #if CONFIG_ENTROPY_STATS
1369             uint8_t drl_ctx =
1370                 av1_drl_ctx(mbmi_ext->ref_mv_stack[ref_frame_type], idx);
1371             ++counts->drl_mode[drl_ctx][mbmi->ref_mv_idx != idx - 1];
1372 #endif
1373 
1374             if (mbmi->ref_mv_idx == idx - 1) break;
1375           }
1376         }
1377       }
1378     }
1379   }
1380 }
1381 
1382 typedef struct {
1383   ENTROPY_CONTEXT a[MAX_MIB_SIZE * MAX_MB_PLANE];
1384   ENTROPY_CONTEXT l[MAX_MIB_SIZE * MAX_MB_PLANE];
1385   PARTITION_CONTEXT sa[MAX_MIB_SIZE];
1386   PARTITION_CONTEXT sl[MAX_MIB_SIZE];
1387   TXFM_CONTEXT *p_ta;
1388   TXFM_CONTEXT *p_tl;
1389   TXFM_CONTEXT ta[MAX_MIB_SIZE];
1390   TXFM_CONTEXT tl[MAX_MIB_SIZE];
1391 } RD_SEARCH_MACROBLOCK_CONTEXT;
1392 
restore_context(MACROBLOCK * x,const RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1393 static void restore_context(MACROBLOCK *x,
1394                             const RD_SEARCH_MACROBLOCK_CONTEXT *ctx, int mi_row,
1395                             int mi_col, BLOCK_SIZE bsize,
1396                             const int num_planes) {
1397   MACROBLOCKD *xd = &x->e_mbd;
1398   int p;
1399   const int num_4x4_blocks_wide =
1400       block_size_wide[bsize] >> tx_size_wide_log2[0];
1401   const int num_4x4_blocks_high =
1402       block_size_high[bsize] >> tx_size_high_log2[0];
1403   int mi_width = mi_size_wide[bsize];
1404   int mi_height = mi_size_high[bsize];
1405   for (p = 0; p < num_planes; p++) {
1406     int tx_col = mi_col;
1407     int tx_row = mi_row & MAX_MIB_MASK;
1408     memcpy(xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1409            ctx->a + num_4x4_blocks_wide * p,
1410            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1411                xd->plane[p].subsampling_x);
1412     memcpy(xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1413            ctx->l + num_4x4_blocks_high * p,
1414            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1415                xd->plane[p].subsampling_y);
1416   }
1417   memcpy(xd->above_seg_context + mi_col, ctx->sa,
1418          sizeof(*xd->above_seg_context) * mi_width);
1419   memcpy(xd->left_seg_context + (mi_row & MAX_MIB_MASK), ctx->sl,
1420          sizeof(xd->left_seg_context[0]) * mi_height);
1421   xd->above_txfm_context = ctx->p_ta;
1422   xd->left_txfm_context = ctx->p_tl;
1423   memcpy(xd->above_txfm_context, ctx->ta,
1424          sizeof(*xd->above_txfm_context) * mi_width);
1425   memcpy(xd->left_txfm_context, ctx->tl,
1426          sizeof(*xd->left_txfm_context) * mi_height);
1427 }
1428 
save_context(const MACROBLOCK * x,RD_SEARCH_MACROBLOCK_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,const int num_planes)1429 static void save_context(const MACROBLOCK *x, RD_SEARCH_MACROBLOCK_CONTEXT *ctx,
1430                          int mi_row, int mi_col, BLOCK_SIZE bsize,
1431                          const int num_planes) {
1432   const MACROBLOCKD *xd = &x->e_mbd;
1433   int p;
1434   const int num_4x4_blocks_wide =
1435       block_size_wide[bsize] >> tx_size_wide_log2[0];
1436   const int num_4x4_blocks_high =
1437       block_size_high[bsize] >> tx_size_high_log2[0];
1438   int mi_width = mi_size_wide[bsize];
1439   int mi_height = mi_size_high[bsize];
1440 
1441   // buffer the above/left context information of the block in search.
1442   for (p = 0; p < num_planes; ++p) {
1443     int tx_col = mi_col;
1444     int tx_row = mi_row & MAX_MIB_MASK;
1445     memcpy(ctx->a + num_4x4_blocks_wide * p,
1446            xd->above_context[p] + (tx_col >> xd->plane[p].subsampling_x),
1447            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1448                xd->plane[p].subsampling_x);
1449     memcpy(ctx->l + num_4x4_blocks_high * p,
1450            xd->left_context[p] + (tx_row >> xd->plane[p].subsampling_y),
1451            (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1452                xd->plane[p].subsampling_y);
1453   }
1454   memcpy(ctx->sa, xd->above_seg_context + mi_col,
1455          sizeof(*xd->above_seg_context) * mi_width);
1456   memcpy(ctx->sl, xd->left_seg_context + (mi_row & MAX_MIB_MASK),
1457          sizeof(xd->left_seg_context[0]) * mi_height);
1458   memcpy(ctx->ta, xd->above_txfm_context,
1459          sizeof(*xd->above_txfm_context) * mi_width);
1460   memcpy(ctx->tl, xd->left_txfm_context,
1461          sizeof(*xd->left_txfm_context) * mi_height);
1462   ctx->p_ta = xd->above_txfm_context;
1463   ctx->p_tl = xd->left_txfm_context;
1464 }
1465 
encode_b(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PARTITION_TYPE partition,const PICK_MODE_CONTEXT * const ctx,int * rate)1466 static void encode_b(const AV1_COMP *const cpi, TileDataEnc *tile_data,
1467                      ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col,
1468                      RUN_TYPE dry_run, BLOCK_SIZE bsize,
1469                      PARTITION_TYPE partition,
1470                      const PICK_MODE_CONTEXT *const ctx, int *rate) {
1471   TileInfo *const tile = &tile_data->tile_info;
1472   MACROBLOCK *const x = &td->mb;
1473   MACROBLOCKD *xd = &x->e_mbd;
1474 
1475   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
1476   MB_MODE_INFO *mbmi = xd->mi[0];
1477   mbmi->partition = partition;
1478   update_state(cpi, tile_data, td, ctx, mi_row, mi_col, bsize, dry_run);
1479   if (cpi->oxcf.enable_tpl_model && cpi->oxcf.aq_mode == NO_AQ &&
1480       cpi->oxcf.deltaq_mode == 0) {
1481     x->rdmult = x->cb_rdmult;
1482   }
1483 
1484   if (!dry_run) av1_set_coeff_buffer(cpi, x, mi_row, mi_col);
1485 
1486   encode_superblock(cpi, tile_data, td, tp, dry_run, mi_row, mi_col, bsize,
1487                     rate);
1488 
1489   if (!dry_run) {
1490     x->cb_offset += block_size_wide[bsize] * block_size_high[bsize];
1491     if (bsize == cpi->common.seq_params.sb_size && mbmi->skip == 1 &&
1492         cpi->common.delta_q_info.delta_lf_present_flag) {
1493       const int frame_lf_count = av1_num_planes(&cpi->common) > 1
1494                                      ? FRAME_LF_COUNT
1495                                      : FRAME_LF_COUNT - 2;
1496       for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
1497         mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
1498       mbmi->delta_lf_from_base = xd->delta_lf_from_base;
1499     }
1500     if (has_second_ref(mbmi)) {
1501       if (mbmi->compound_idx == 0 ||
1502           mbmi->interinter_comp.type == COMPOUND_AVERAGE)
1503         mbmi->comp_group_idx = 0;
1504       else
1505         mbmi->comp_group_idx = 1;
1506     }
1507     update_stats(&cpi->common, tile_data, td, mi_row, mi_col);
1508   }
1509 }
1510 
encode_sb(const AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,RUN_TYPE dry_run,BLOCK_SIZE bsize,PC_TREE * pc_tree,int * rate)1511 static void encode_sb(const AV1_COMP *const cpi, ThreadData *td,
1512                       TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row,
1513                       int mi_col, RUN_TYPE dry_run, BLOCK_SIZE bsize,
1514                       PC_TREE *pc_tree, int *rate) {
1515   const AV1_COMMON *const cm = &cpi->common;
1516   MACROBLOCK *const x = &td->mb;
1517   MACROBLOCKD *const xd = &x->e_mbd;
1518   const int hbs = mi_size_wide[bsize] / 2;
1519   const int is_partition_root = bsize >= BLOCK_8X8;
1520   const int ctx = is_partition_root
1521                       ? partition_plane_context(xd, mi_row, mi_col, bsize)
1522                       : -1;
1523   const PARTITION_TYPE partition = pc_tree->partitioning;
1524   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1525   int quarter_step = mi_size_wide[bsize] / 4;
1526   int i;
1527   BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
1528 
1529   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1530 
1531   if (!dry_run && ctx >= 0) {
1532     const int has_rows = (mi_row + hbs) < cm->mi_rows;
1533     const int has_cols = (mi_col + hbs) < cm->mi_cols;
1534 
1535     if (has_rows && has_cols) {
1536 #if CONFIG_ENTROPY_STATS
1537       td->counts->partition[ctx][partition]++;
1538 #endif
1539 
1540       if (tile_data->allow_update_cdf) {
1541         FRAME_CONTEXT *fc = xd->tile_ctx;
1542         update_cdf(fc->partition_cdf[ctx], partition,
1543                    partition_cdf_length(bsize));
1544       }
1545     }
1546   }
1547 
1548   switch (partition) {
1549     case PARTITION_NONE:
1550       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1551                partition, &pc_tree->none, rate);
1552       break;
1553     case PARTITION_VERT:
1554       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1555                partition, &pc_tree->vertical[0], rate);
1556       if (mi_col + hbs < cm->mi_cols) {
1557         encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1558                  partition, &pc_tree->vertical[1], rate);
1559       }
1560       break;
1561     case PARTITION_HORZ:
1562       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1563                partition, &pc_tree->horizontal[0], rate);
1564       if (mi_row + hbs < cm->mi_rows) {
1565         encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1566                  partition, &pc_tree->horizontal[1], rate);
1567       }
1568       break;
1569     case PARTITION_SPLIT:
1570       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, dry_run, subsize,
1571                 pc_tree->split[0], rate);
1572       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col + hbs, dry_run, subsize,
1573                 pc_tree->split[1], rate);
1574       encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col, dry_run, subsize,
1575                 pc_tree->split[2], rate);
1576       encode_sb(cpi, td, tile_data, tp, mi_row + hbs, mi_col + hbs, dry_run,
1577                 subsize, pc_tree->split[3], rate);
1578       break;
1579 
1580     case PARTITION_HORZ_A:
1581       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1582                partition, &pc_tree->horizontala[0], rate);
1583       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1584                partition, &pc_tree->horizontala[1], rate);
1585       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, subsize,
1586                partition, &pc_tree->horizontala[2], rate);
1587       break;
1588     case PARTITION_HORZ_B:
1589       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1590                partition, &pc_tree->horizontalb[0], rate);
1591       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1592                partition, &pc_tree->horizontalb[1], rate);
1593       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1594                bsize2, partition, &pc_tree->horizontalb[2], rate);
1595       break;
1596     case PARTITION_VERT_A:
1597       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, bsize2,
1598                partition, &pc_tree->verticala[0], rate);
1599       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col, dry_run, bsize2,
1600                partition, &pc_tree->verticala[1], rate);
1601       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, subsize,
1602                partition, &pc_tree->verticala[2], rate);
1603 
1604       break;
1605     case PARTITION_VERT_B:
1606       encode_b(cpi, tile_data, td, tp, mi_row, mi_col, dry_run, subsize,
1607                partition, &pc_tree->verticalb[0], rate);
1608       encode_b(cpi, tile_data, td, tp, mi_row, mi_col + hbs, dry_run, bsize2,
1609                partition, &pc_tree->verticalb[1], rate);
1610       encode_b(cpi, tile_data, td, tp, mi_row + hbs, mi_col + hbs, dry_run,
1611                bsize2, partition, &pc_tree->verticalb[2], rate);
1612       break;
1613     case PARTITION_HORZ_4:
1614       for (i = 0; i < 4; ++i) {
1615         int this_mi_row = mi_row + i * quarter_step;
1616         if (i > 0 && this_mi_row >= cm->mi_rows) break;
1617 
1618         encode_b(cpi, tile_data, td, tp, this_mi_row, mi_col, dry_run, subsize,
1619                  partition, &pc_tree->horizontal4[i], rate);
1620       }
1621       break;
1622     case PARTITION_VERT_4:
1623       for (i = 0; i < 4; ++i) {
1624         int this_mi_col = mi_col + i * quarter_step;
1625         if (i > 0 && this_mi_col >= cm->mi_cols) break;
1626 
1627         encode_b(cpi, tile_data, td, tp, mi_row, this_mi_col, dry_run, subsize,
1628                  partition, &pc_tree->vertical4[i], rate);
1629       }
1630       break;
1631     default: assert(0 && "Invalid partition type."); break;
1632   }
1633 
1634   update_ext_partition_context(xd, mi_row, mi_col, subsize, bsize, partition);
1635 }
1636 
set_partial_sb_partition(const AV1_COMMON * const cm,MB_MODE_INFO * mi,int bh_in,int bw_in,int mi_rows_remaining,int mi_cols_remaining,BLOCK_SIZE bsize,MB_MODE_INFO ** mib)1637 static void set_partial_sb_partition(const AV1_COMMON *const cm,
1638                                      MB_MODE_INFO *mi, int bh_in, int bw_in,
1639                                      int mi_rows_remaining,
1640                                      int mi_cols_remaining, BLOCK_SIZE bsize,
1641                                      MB_MODE_INFO **mib) {
1642   int bh = bh_in;
1643   int r, c;
1644   for (r = 0; r < cm->seq_params.mib_size; r += bh) {
1645     int bw = bw_in;
1646     for (c = 0; c < cm->seq_params.mib_size; c += bw) {
1647       const int index = r * cm->mi_stride + c;
1648       mib[index] = mi + index;
1649       mib[index]->sb_type = find_partition_size(
1650           bsize, mi_rows_remaining - r, mi_cols_remaining - c, &bh, &bw);
1651     }
1652   }
1653 }
1654 
1655 // This function attempts to set all mode info entries in a given superblock
1656 // to the same block partition size.
1657 // However, at the bottom and right borders of the image the requested size
1658 // may not be allowed in which case this code attempts to choose the largest
1659 // allowable partition.
set_fixed_partitioning(AV1_COMP * cpi,const TileInfo * const tile,MB_MODE_INFO ** mib,int mi_row,int mi_col,BLOCK_SIZE bsize)1660 static void set_fixed_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
1661                                    MB_MODE_INFO **mib, int mi_row, int mi_col,
1662                                    BLOCK_SIZE bsize) {
1663   AV1_COMMON *const cm = &cpi->common;
1664   const int mi_rows_remaining = tile->mi_row_end - mi_row;
1665   const int mi_cols_remaining = tile->mi_col_end - mi_col;
1666   int block_row, block_col;
1667   MB_MODE_INFO *const mi_upper_left = cm->mi + mi_row * cm->mi_stride + mi_col;
1668   int bh = mi_size_high[bsize];
1669   int bw = mi_size_wide[bsize];
1670 
1671   assert((mi_rows_remaining > 0) && (mi_cols_remaining > 0));
1672 
1673   // Apply the requested partition size to the SB if it is all "in image"
1674   if ((mi_cols_remaining >= cm->seq_params.mib_size) &&
1675       (mi_rows_remaining >= cm->seq_params.mib_size)) {
1676     for (block_row = 0; block_row < cm->seq_params.mib_size; block_row += bh) {
1677       for (block_col = 0; block_col < cm->seq_params.mib_size;
1678            block_col += bw) {
1679         int index = block_row * cm->mi_stride + block_col;
1680         mib[index] = mi_upper_left + index;
1681         mib[index]->sb_type = bsize;
1682       }
1683     }
1684   } else {
1685     // Else this is a partial SB.
1686     set_partial_sb_partition(cm, mi_upper_left, bh, bw, mi_rows_remaining,
1687                              mi_cols_remaining, bsize, mib);
1688   }
1689 }
1690 
rd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1691 static void rd_use_partition(AV1_COMP *cpi, ThreadData *td,
1692                              TileDataEnc *tile_data, MB_MODE_INFO **mib,
1693                              TOKENEXTRA **tp, int mi_row, int mi_col,
1694                              BLOCK_SIZE bsize, int *rate, int64_t *dist,
1695                              int do_recon, PC_TREE *pc_tree) {
1696   AV1_COMMON *const cm = &cpi->common;
1697   const int num_planes = av1_num_planes(cm);
1698   TileInfo *const tile_info = &tile_data->tile_info;
1699   MACROBLOCK *const x = &td->mb;
1700   MACROBLOCKD *const xd = &x->e_mbd;
1701   const int bs = mi_size_wide[bsize];
1702   const int hbs = bs / 2;
1703   int i;
1704   const int pl = (bsize >= BLOCK_8X8)
1705                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
1706                      : 0;
1707   const PARTITION_TYPE partition =
1708       (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1709                            : PARTITION_NONE;
1710   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1711   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1712   RD_STATS last_part_rdc, none_rdc, chosen_rdc;
1713   BLOCK_SIZE sub_subsize = BLOCK_4X4;
1714   int splits_below = 0;
1715   BLOCK_SIZE bs_type = mib[0]->sb_type;
1716   int do_partition_search = 1;
1717   PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
1718 
1719   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1720 
1721   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1722 
1723   av1_invalid_rd_stats(&last_part_rdc);
1724   av1_invalid_rd_stats(&none_rdc);
1725   av1_invalid_rd_stats(&chosen_rdc);
1726 
1727   pc_tree->partitioning = partition;
1728 
1729   xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
1730   xd->left_txfm_context =
1731       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1732   save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1733 
1734   if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1735     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1736     x->mb_energy = av1_log_block_var(cpi, x, bsize);
1737   }
1738 
1739   if (do_partition_search &&
1740       cpi->sf.partition_search_type == SEARCH_PARTITION &&
1741       cpi->sf.adjust_partitioning_from_last_frame) {
1742     // Check if any of the sub blocks are further split.
1743     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
1744       sub_subsize = get_partition_subsize(subsize, PARTITION_SPLIT);
1745       splits_below = 1;
1746       for (i = 0; i < 4; i++) {
1747         int jj = i >> 1, ii = i & 0x01;
1748         MB_MODE_INFO *this_mi = mib[jj * hbs * cm->mi_stride + ii * hbs];
1749         if (this_mi && this_mi->sb_type >= sub_subsize) {
1750           splits_below = 0;
1751         }
1752       }
1753     }
1754 
1755     // If partition is not none try none unless each of the 4 splits are split
1756     // even further..
1757     if (partition != PARTITION_NONE && !splits_below &&
1758         mi_row + hbs < cm->mi_rows && mi_col + hbs < cm->mi_cols) {
1759       pc_tree->partitioning = PARTITION_NONE;
1760       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc,
1761                     PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0);
1762 
1763       if (none_rdc.rate < INT_MAX) {
1764         none_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
1765         none_rdc.rdcost = RDCOST(x->rdmult, none_rdc.rate, none_rdc.dist);
1766       }
1767 
1768       restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1769       mib[0]->sb_type = bs_type;
1770       pc_tree->partitioning = partition;
1771     }
1772   }
1773 
1774   switch (partition) {
1775     case PARTITION_NONE:
1776       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1777                     PARTITION_NONE, bsize, ctx_none, INT64_MAX, 0);
1778       break;
1779     case PARTITION_HORZ:
1780       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1781                     PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
1782                     0);
1783       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1784           mi_row + hbs < cm->mi_rows) {
1785         RD_STATS tmp_rdc;
1786         const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
1787         av1_init_rd_stats(&tmp_rdc);
1788         update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
1789         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
1790                           mi_col, subsize, NULL);
1791         pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
1792                       PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
1793                       INT64_MAX, 0);
1794         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1795           av1_invalid_rd_stats(&last_part_rdc);
1796           break;
1797         }
1798         last_part_rdc.rate += tmp_rdc.rate;
1799         last_part_rdc.dist += tmp_rdc.dist;
1800         last_part_rdc.rdcost += tmp_rdc.rdcost;
1801       }
1802       break;
1803     case PARTITION_VERT:
1804       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1805                     PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
1806                     0);
1807       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
1808           mi_col + hbs < cm->mi_cols) {
1809         RD_STATS tmp_rdc;
1810         const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
1811         av1_init_rd_stats(&tmp_rdc);
1812         update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
1813         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
1814                           mi_col, subsize, NULL);
1815         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
1816                       PARTITION_VERT, subsize,
1817                       &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 0);
1818         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1819           av1_invalid_rd_stats(&last_part_rdc);
1820           break;
1821         }
1822         last_part_rdc.rate += tmp_rdc.rate;
1823         last_part_rdc.dist += tmp_rdc.dist;
1824         last_part_rdc.rdcost += tmp_rdc.rdcost;
1825       }
1826       break;
1827     case PARTITION_SPLIT:
1828       last_part_rdc.rate = 0;
1829       last_part_rdc.dist = 0;
1830       last_part_rdc.rdcost = 0;
1831       for (i = 0; i < 4; i++) {
1832         int x_idx = (i & 1) * hbs;
1833         int y_idx = (i >> 1) * hbs;
1834         int jj = i >> 1, ii = i & 0x01;
1835         RD_STATS tmp_rdc;
1836         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1837           continue;
1838 
1839         av1_init_rd_stats(&tmp_rdc);
1840         rd_use_partition(cpi, td, tile_data,
1841                          mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
1842                          mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
1843                          &tmp_rdc.dist, i != 3, pc_tree->split[i]);
1844         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1845           av1_invalid_rd_stats(&last_part_rdc);
1846           break;
1847         }
1848         last_part_rdc.rate += tmp_rdc.rate;
1849         last_part_rdc.dist += tmp_rdc.dist;
1850       }
1851       break;
1852     case PARTITION_VERT_A:
1853     case PARTITION_VERT_B:
1854     case PARTITION_HORZ_A:
1855     case PARTITION_HORZ_B:
1856     case PARTITION_HORZ_4:
1857     case PARTITION_VERT_4:
1858       assert(0 && "Cannot handle extended partition types");
1859     default: assert(0); break;
1860   }
1861 
1862   if (last_part_rdc.rate < INT_MAX) {
1863     last_part_rdc.rate += x->partition_cost[pl][partition];
1864     last_part_rdc.rdcost =
1865         RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
1866   }
1867 
1868   if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame &&
1869       cpi->sf.partition_search_type == SEARCH_PARTITION &&
1870       partition != PARTITION_SPLIT && bsize > BLOCK_8X8 &&
1871       (mi_row + bs < cm->mi_rows || mi_row + hbs == cm->mi_rows) &&
1872       (mi_col + bs < cm->mi_cols || mi_col + hbs == cm->mi_cols)) {
1873     BLOCK_SIZE split_subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
1874     chosen_rdc.rate = 0;
1875     chosen_rdc.dist = 0;
1876 
1877     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1878     pc_tree->partitioning = PARTITION_SPLIT;
1879 
1880     // Split partition.
1881     for (i = 0; i < 4; i++) {
1882       int x_idx = (i & 1) * hbs;
1883       int y_idx = (i >> 1) * hbs;
1884       RD_STATS tmp_rdc;
1885 
1886       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1887         continue;
1888 
1889       save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1890       pc_tree->split[i]->partitioning = PARTITION_NONE;
1891       pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
1892                     PARTITION_SPLIT, split_subsize, &pc_tree->split[i]->none,
1893                     INT64_MAX, 0);
1894 
1895       restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1896       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1897         av1_invalid_rd_stats(&chosen_rdc);
1898         break;
1899       }
1900 
1901       chosen_rdc.rate += tmp_rdc.rate;
1902       chosen_rdc.dist += tmp_rdc.dist;
1903 
1904       if (i != 3)
1905         encode_sb(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
1906                   OUTPUT_ENABLED, split_subsize, pc_tree->split[i], NULL);
1907 
1908       chosen_rdc.rate += x->partition_cost[pl][PARTITION_NONE];
1909     }
1910     if (chosen_rdc.rate < INT_MAX) {
1911       chosen_rdc.rate += x->partition_cost[pl][PARTITION_SPLIT];
1912       chosen_rdc.rdcost = RDCOST(x->rdmult, chosen_rdc.rate, chosen_rdc.dist);
1913     }
1914   }
1915 
1916   // If last_part is better set the partitioning to that.
1917   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
1918     mib[0]->sb_type = bsize;
1919     if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition;
1920     chosen_rdc = last_part_rdc;
1921   }
1922   // If none was better set the partitioning to that.
1923   if (none_rdc.rdcost < chosen_rdc.rdcost) {
1924     if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE;
1925     chosen_rdc = none_rdc;
1926   }
1927 
1928   restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1929 
1930   // We must have chosen a partitioning and encoding or we'll fail later on.
1931   // No other opportunities for success.
1932   if (bsize == cm->seq_params.sb_size)
1933     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
1934 
1935   if (do_recon) {
1936     if (bsize == cm->seq_params.sb_size) {
1937       // NOTE: To get estimate for rate due to the tokens, use:
1938       // int rate_coeffs = 0;
1939       // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
1940       //           bsize, pc_tree, &rate_coeffs);
1941       x->cb_offset = 0;
1942       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
1943                 pc_tree, NULL);
1944     } else {
1945       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
1946                 pc_tree, NULL);
1947     }
1948   }
1949 
1950   *rate = chosen_rdc.rate;
1951   *dist = chosen_rdc.dist;
1952 }
1953 
1954 // TODO(kyslov): now this is very similar to rd_use_partition (except that
1955 // doesn't do extra search arounf suggested partitioning)
1956 //               consider passing a flag to select non-rd path (similar to
1957 //               encode_sb_row)
nonrd_use_partition(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MB_MODE_INFO ** mib,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1958 static void nonrd_use_partition(AV1_COMP *cpi, ThreadData *td,
1959                                 TileDataEnc *tile_data, MB_MODE_INFO **mib,
1960                                 TOKENEXTRA **tp, int mi_row, int mi_col,
1961                                 BLOCK_SIZE bsize, int *rate, int64_t *dist,
1962                                 int do_recon, PC_TREE *pc_tree) {
1963   AV1_COMMON *const cm = &cpi->common;
1964   const int num_planes = av1_num_planes(cm);
1965   TileInfo *const tile_info = &tile_data->tile_info;
1966   MACROBLOCK *const x = &td->mb;
1967   MACROBLOCKD *const xd = &x->e_mbd;
1968   const int bs = mi_size_wide[bsize];
1969   const int hbs = bs / 2;
1970   int i;
1971   const int pl = (bsize >= BLOCK_8X8)
1972                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
1973                      : 0;
1974   const PARTITION_TYPE partition =
1975       (bsize >= BLOCK_8X8) ? get_partition(cm, mi_row, mi_col, bsize)
1976                            : PARTITION_NONE;
1977   const BLOCK_SIZE subsize = get_partition_subsize(bsize, partition);
1978   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
1979   RD_STATS last_part_rdc;
1980   PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
1981 
1982   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return;
1983 
1984   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
1985 
1986   av1_invalid_rd_stats(&last_part_rdc);
1987 
1988   pc_tree->partitioning = partition;
1989 
1990   xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
1991   xd->left_txfm_context =
1992       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
1993   save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
1994 
1995   if (bsize == BLOCK_16X16 && cpi->vaq_refresh) {
1996     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1997     x->mb_energy = av1_log_block_var(cpi, x, bsize);
1998   }
1999 
2000   switch (partition) {
2001     case PARTITION_NONE:
2002       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2003                     PARTITION_NONE, bsize, ctx_none, INT64_MAX, 1);
2004       break;
2005     case PARTITION_HORZ:
2006       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2007                     PARTITION_HORZ, subsize, &pc_tree->horizontal[0], INT64_MAX,
2008                     1);
2009       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2010           mi_row + hbs < cm->mi_rows) {
2011         RD_STATS tmp_rdc;
2012         const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
2013         av1_init_rd_stats(&tmp_rdc);
2014         update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
2015         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
2016                           mi_col, subsize, NULL);
2017         pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &tmp_rdc,
2018                       PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
2019                       INT64_MAX, 1);
2020         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2021           av1_invalid_rd_stats(&last_part_rdc);
2022           break;
2023         }
2024         last_part_rdc.rate += tmp_rdc.rate;
2025         last_part_rdc.dist += tmp_rdc.dist;
2026         last_part_rdc.rdcost += tmp_rdc.rdcost;
2027       }
2028       break;
2029     case PARTITION_VERT:
2030       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
2031                     PARTITION_VERT, subsize, &pc_tree->vertical[0], INT64_MAX,
2032                     1);
2033       if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 &&
2034           mi_col + hbs < cm->mi_cols) {
2035         RD_STATS tmp_rdc;
2036         const PICK_MODE_CONTEXT *const ctx_v = &pc_tree->vertical[0];
2037         av1_init_rd_stats(&tmp_rdc);
2038         update_state(cpi, tile_data, td, ctx_v, mi_row, mi_col, subsize, 1);
2039         encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row,
2040                           mi_col, subsize, NULL);
2041         pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &tmp_rdc,
2042                       PARTITION_VERT, subsize,
2043                       &pc_tree->vertical[bsize > BLOCK_8X8], INT64_MAX, 1);
2044         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2045           av1_invalid_rd_stats(&last_part_rdc);
2046           break;
2047         }
2048         last_part_rdc.rate += tmp_rdc.rate;
2049         last_part_rdc.dist += tmp_rdc.dist;
2050         last_part_rdc.rdcost += tmp_rdc.rdcost;
2051       }
2052       break;
2053     case PARTITION_SPLIT:
2054       last_part_rdc.rate = 0;
2055       last_part_rdc.dist = 0;
2056       last_part_rdc.rdcost = 0;
2057       for (i = 0; i < 4; i++) {
2058         int x_idx = (i & 1) * hbs;
2059         int y_idx = (i >> 1) * hbs;
2060         int jj = i >> 1, ii = i & 0x01;
2061         RD_STATS tmp_rdc;
2062         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2063           continue;
2064 
2065         av1_init_rd_stats(&tmp_rdc);
2066         nonrd_use_partition(
2067             cpi, td, tile_data, mib + jj * hbs * cm->mi_stride + ii * hbs, tp,
2068             mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate,
2069             &tmp_rdc.dist, i != 3, pc_tree->split[i]);
2070         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2071           av1_invalid_rd_stats(&last_part_rdc);
2072           break;
2073         }
2074         last_part_rdc.rate += tmp_rdc.rate;
2075         last_part_rdc.dist += tmp_rdc.dist;
2076       }
2077       break;
2078     case PARTITION_VERT_A:
2079     case PARTITION_VERT_B:
2080     case PARTITION_HORZ_A:
2081     case PARTITION_HORZ_B:
2082     case PARTITION_HORZ_4:
2083     case PARTITION_VERT_4:
2084       assert(0 && "Cannot handle extended partition types");
2085     default: assert(0); break;
2086   }
2087 
2088   if (last_part_rdc.rate < INT_MAX) {
2089     last_part_rdc.rate += x->partition_cost[pl][partition];
2090     last_part_rdc.rdcost =
2091         RDCOST(x->rdmult, last_part_rdc.rate, last_part_rdc.dist);
2092   }
2093 
2094   restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2095 
2096   // We must have chosen a partitioning and encoding or we'll fail later on.
2097   // No other opportunities for success.
2098   if (bsize == cm->seq_params.sb_size)
2099     assert(last_part_rdc.rate < INT_MAX && last_part_rdc.dist < INT64_MAX);
2100 
2101   if (do_recon) {
2102     if (bsize == cm->seq_params.sb_size) {
2103       // NOTE: To get estimate for rate due to the tokens, use:
2104       // int rate_coeffs = 0;
2105       // encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_COSTCOEFFS,
2106       //           bsize, pc_tree, &rate_coeffs);
2107       x->cb_offset = 0;
2108       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
2109                 pc_tree, NULL);
2110     } else {
2111       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2112                 pc_tree, NULL);
2113     }
2114   }
2115 
2116   *rate = last_part_rdc.rate;
2117   *dist = last_part_rdc.dist;
2118 }
2119 
2120 // Checks to see if a super block is on a horizontal image edge.
2121 // In most cases this is the "real" edge unless there are formatting
2122 // bars embedded in the stream.
active_h_edge(const AV1_COMP * cpi,int mi_row,int mi_step)2123 static int active_h_edge(const AV1_COMP *cpi, int mi_row, int mi_step) {
2124   int top_edge = 0;
2125   int bottom_edge = cpi->common.mi_rows;
2126   int is_active_h_edge = 0;
2127 
2128   // For two pass account for any formatting bars detected.
2129   if (cpi->oxcf.pass == 2) {
2130     const TWO_PASS *const twopass = &cpi->twopass;
2131 
2132     // The inactive region is specified in MBs not mi units.
2133     // The image edge is in the following MB row.
2134     top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2135 
2136     bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2);
2137     bottom_edge = AOMMAX(top_edge, bottom_edge);
2138   }
2139 
2140   if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) ||
2141       ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) {
2142     is_active_h_edge = 1;
2143   }
2144   return is_active_h_edge;
2145 }
2146 
2147 // Checks to see if a super block is on a vertical image edge.
2148 // In most cases this is the "real" edge unless there are formatting
2149 // bars embedded in the stream.
active_v_edge(const AV1_COMP * cpi,int mi_col,int mi_step)2150 static int active_v_edge(const AV1_COMP *cpi, int mi_col, int mi_step) {
2151   int left_edge = 0;
2152   int right_edge = cpi->common.mi_cols;
2153   int is_active_v_edge = 0;
2154 
2155   // For two pass account for any formatting bars detected.
2156   if (cpi->oxcf.pass == 2) {
2157     const TWO_PASS *const twopass = &cpi->twopass;
2158 
2159     // The inactive region is specified in MBs not mi units.
2160     // The image edge is in the following MB row.
2161     left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2162 
2163     right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2);
2164     right_edge = AOMMAX(left_edge, right_edge);
2165   }
2166 
2167   if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) ||
2168       ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) {
2169     is_active_v_edge = 1;
2170   }
2171   return is_active_v_edge;
2172 }
2173 
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2174 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2175   memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2176 }
2177 
load_pred_mv(MACROBLOCK * x,const PICK_MODE_CONTEXT * const ctx)2178 static INLINE void load_pred_mv(MACROBLOCK *x,
2179                                 const PICK_MODE_CONTEXT *const ctx) {
2180   memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2181 }
2182 
2183 // Try searching for an encoding for the given subblock. Returns zero if the
2184 // rdcost is already too high (to tell the caller not to bother searching for
2185 // encodings of further subblocks)
rd_try_subblock(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int is_last,int mi_row,int mi_col,BLOCK_SIZE subsize,RD_STATS * best_rdc,RD_STATS * sum_rdc,RD_STATS * this_rdc,PARTITION_TYPE partition,PICK_MODE_CONTEXT * prev_ctx,PICK_MODE_CONTEXT * this_ctx)2186 static int rd_try_subblock(AV1_COMP *const cpi, ThreadData *td,
2187                            TileDataEnc *tile_data, TOKENEXTRA **tp, int is_last,
2188                            int mi_row, int mi_col, BLOCK_SIZE subsize,
2189                            RD_STATS *best_rdc, RD_STATS *sum_rdc,
2190                            RD_STATS *this_rdc, PARTITION_TYPE partition,
2191                            PICK_MODE_CONTEXT *prev_ctx,
2192                            PICK_MODE_CONTEXT *this_ctx) {
2193 #define RTS_X_RATE_NOCOEF_ARG
2194 #define RTS_MAX_RDCOST best_rdc->rdcost
2195 
2196   MACROBLOCK *const x = &td->mb;
2197 
2198   if (cpi->sf.adaptive_motion_search) load_pred_mv(x, prev_ctx);
2199 
2200   const int64_t rdcost_remaining = best_rdc->rdcost == INT64_MAX
2201                                        ? INT64_MAX
2202                                        : (best_rdc->rdcost - sum_rdc->rdcost);
2203 
2204   pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, this_rdc,
2205                 RTS_X_RATE_NOCOEF_ARG partition, subsize, this_ctx,
2206                 rdcost_remaining, 0);
2207 
2208   if (this_rdc->rate == INT_MAX) {
2209     sum_rdc->rdcost = INT64_MAX;
2210   } else {
2211     sum_rdc->rate += this_rdc->rate;
2212     sum_rdc->dist += this_rdc->dist;
2213     sum_rdc->rdcost += this_rdc->rdcost;
2214   }
2215 
2216   if (sum_rdc->rdcost >= RTS_MAX_RDCOST) return 0;
2217 
2218   if (!is_last) {
2219     update_state(cpi, tile_data, td, this_ctx, mi_row, mi_col, subsize, 1);
2220     encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
2221                       subsize, NULL);
2222   }
2223 
2224   return 1;
2225 
2226 #undef RTS_X_RATE_NOCOEF_ARG
2227 #undef RTS_MAX_RDCOST
2228 }
2229 
rd_test_partition3(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,PC_TREE * pc_tree,RD_STATS * best_rdc,PICK_MODE_CONTEXT ctxs[3],PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,PARTITION_TYPE partition,int mi_row0,int mi_col0,BLOCK_SIZE subsize0,int mi_row1,int mi_col1,BLOCK_SIZE subsize1,int mi_row2,int mi_col2,BLOCK_SIZE subsize2)2230 static void rd_test_partition3(AV1_COMP *const cpi, ThreadData *td,
2231                                TileDataEnc *tile_data, TOKENEXTRA **tp,
2232                                PC_TREE *pc_tree, RD_STATS *best_rdc,
2233                                PICK_MODE_CONTEXT ctxs[3],
2234                                PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col,
2235                                BLOCK_SIZE bsize, PARTITION_TYPE partition,
2236                                int mi_row0, int mi_col0, BLOCK_SIZE subsize0,
2237                                int mi_row1, int mi_col1, BLOCK_SIZE subsize1,
2238                                int mi_row2, int mi_col2, BLOCK_SIZE subsize2) {
2239   MACROBLOCK *const x = &td->mb;
2240   MACROBLOCKD *const xd = &x->e_mbd;
2241   RD_STATS sum_rdc, this_rdc;
2242 #define RTP_STX_TRY_ARGS
2243   int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2244   av1_init_rd_stats(&sum_rdc);
2245   sum_rdc.rate = x->partition_cost[pl][partition];
2246   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2247   if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row0, mi_col0, subsize0,
2248                        best_rdc, &sum_rdc, &this_rdc,
2249                        RTP_STX_TRY_ARGS partition, ctx, &ctxs[0]))
2250     return;
2251 
2252   if (!rd_try_subblock(cpi, td, tile_data, tp, 0, mi_row1, mi_col1, subsize1,
2253                        best_rdc, &sum_rdc, &this_rdc,
2254                        RTP_STX_TRY_ARGS partition, &ctxs[0], &ctxs[1]))
2255     return;
2256 
2257   // With the new layout of mixed partitions for PARTITION_HORZ_B and
2258   // PARTITION_VERT_B, the last subblock might start past halfway through the
2259   // main block, so we might signal it even though the subblock lies strictly
2260   // outside the image. In that case, we won't spend any bits coding it and the
2261   // difference (obviously) doesn't contribute to the error.
2262   const int try_block2 = 1;
2263   if (try_block2 &&
2264       !rd_try_subblock(cpi, td, tile_data, tp, 1, mi_row2, mi_col2, subsize2,
2265                        best_rdc, &sum_rdc, &this_rdc,
2266                        RTP_STX_TRY_ARGS partition, &ctxs[1], &ctxs[2]))
2267     return;
2268 
2269   if (sum_rdc.rdcost >= best_rdc->rdcost) return;
2270 
2271   sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2272 
2273   if (sum_rdc.rdcost >= best_rdc->rdcost) return;
2274 
2275   *best_rdc = sum_rdc;
2276   pc_tree->partitioning = partition;
2277 
2278 #undef RTP_STX_TRY_ARGS
2279 }
2280 
reset_partition(PC_TREE * pc_tree,BLOCK_SIZE bsize)2281 static void reset_partition(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
2282   pc_tree->partitioning = PARTITION_NONE;
2283   pc_tree->cb_search_range = SEARCH_FULL_PLANE;
2284   pc_tree->none.skip = 0;
2285 
2286   pc_tree->pc_tree_stats.valid = 0;
2287   pc_tree->pc_tree_stats.split = 0;
2288   pc_tree->pc_tree_stats.skip = 0;
2289   pc_tree->pc_tree_stats.rdcost = INT64_MAX;
2290 
2291   for (int i = 0; i < 4; i++) {
2292     pc_tree->pc_tree_stats.sub_block_split[i] = 0;
2293     pc_tree->pc_tree_stats.sub_block_skip[i] = 0;
2294     pc_tree->pc_tree_stats.sub_block_rdcost[i] = INT64_MAX;
2295   }
2296 
2297   if (bsize >= BLOCK_8X8) {
2298     BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2299     for (int idx = 0; idx < 4; ++idx)
2300       reset_partition(pc_tree->split[idx], subsize);
2301   }
2302 }
2303 
rd_pick_sqr_partition(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_STATS * rd_cost,int64_t best_rd,PC_TREE * pc_tree,int64_t * none_rd)2304 static void rd_pick_sqr_partition(AV1_COMP *const cpi, ThreadData *td,
2305                                   TileDataEnc *tile_data, TOKENEXTRA **tp,
2306                                   int mi_row, int mi_col, BLOCK_SIZE bsize,
2307                                   RD_STATS *rd_cost, int64_t best_rd,
2308                                   PC_TREE *pc_tree, int64_t *none_rd) {
2309   const AV1_COMMON *const cm = &cpi->common;
2310   TileInfo *const tile_info = &tile_data->tile_info;
2311   MACROBLOCK *const x = &td->mb;
2312   MACROBLOCKD *const xd = &x->e_mbd;
2313   const int mi_step = mi_size_wide[bsize] / 2;
2314   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
2315   const TOKENEXTRA *const tp_orig = *tp;
2316   PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
2317   int tmp_partition_cost[PARTITION_TYPES];
2318   BLOCK_SIZE subsize;
2319   RD_STATS this_rdc, sum_rdc, best_rdc, pn_rdc;
2320   const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
2321   int do_square_split = bsize_at_least_8x8;
2322   const int pl = bsize_at_least_8x8
2323                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
2324                      : 0;
2325   const int *partition_cost =
2326       pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
2327   const int num_planes = av1_num_planes(cm);
2328 
2329   int64_t split_rd[4] = { 0, 0, 0, 0 };
2330 
2331   // Override skipping rectangular partition operations for edge blocks
2332   const int has_rows = (mi_row + mi_step < cm->mi_rows);
2333   const int has_cols = (mi_col + mi_step < cm->mi_cols);
2334 
2335   if (none_rd) *none_rd = 0;
2336 
2337   int partition_none_allowed = has_rows && has_cols;
2338 
2339   (void)*tp_orig;
2340   (void)split_rd;
2341 
2342   if (best_rd < 0) {
2343     pc_tree->none.rdcost = INT64_MAX;
2344     pc_tree->none.skip = 0;
2345     av1_invalid_rd_stats(rd_cost);
2346     return;
2347   }
2348   pc_tree->pc_tree_stats.valid = 1;
2349 
2350   // Override partition costs at the edges of the frame in the same
2351   // way as in read_partition (see decodeframe.c)
2352   if (!(has_rows && has_cols)) {
2353     assert(bsize_at_least_8x8 && pl >= 0);
2354     const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
2355     for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
2356     if (has_cols) {
2357       // At the bottom, the two possibilities are HORZ and SPLIT
2358       aom_cdf_prob bot_cdf[2];
2359       partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
2360       static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
2361       av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
2362     } else if (has_rows) {
2363       // At the right, the two possibilities are VERT and SPLIT
2364       aom_cdf_prob rhs_cdf[2];
2365       partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
2366       static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
2367       av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
2368     } else {
2369       // At the bottom right, we always split
2370       tmp_partition_cost[PARTITION_SPLIT] = 0;
2371     }
2372 
2373     partition_cost = tmp_partition_cost;
2374   }
2375 
2376 #ifndef NDEBUG
2377   // Nothing should rely on the default value of this array (which is just
2378   // leftover from encoding the previous block. Setting it to fixed pattern
2379   // when debugging.
2380   // bit 0, 1, 2 are blk_skip of each plane
2381   // bit 4, 5, 6 are initialization checking of each plane
2382   memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
2383 #endif  // NDEBUG
2384 
2385   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
2386 
2387   av1_init_rd_stats(&this_rdc);
2388   av1_init_rd_stats(&sum_rdc);
2389   av1_invalid_rd_stats(&best_rdc);
2390   best_rdc.rdcost = best_rd;
2391 
2392   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2393 
2394   if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
2395     x->mb_energy = av1_log_block_var(cpi, x, bsize);
2396 
2397   xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
2398   xd->left_txfm_context =
2399       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
2400   save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2401 
2402 #if CONFIG_DIST_8X8
2403   if (x->using_dist_8x8) {
2404     if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
2405       do_square_split = 0;
2406   }
2407 #endif
2408 
2409   // PARTITION_NONE
2410   if (partition_none_allowed) {
2411     int pt_cost = 0;
2412     if (bsize_at_least_8x8) {
2413       pc_tree->partitioning = PARTITION_NONE;
2414       pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
2415                     ? partition_cost[PARTITION_NONE]
2416                     : 0;
2417     }
2418     const int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
2419     const int64_t best_remain_rdcost =
2420         best_rdc.rdcost == INT64_MAX ? INT64_MAX
2421                                      : (best_rdc.rdcost - partition_rd_cost);
2422     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
2423                   bsize, ctx_none, best_remain_rdcost, 0);
2424 
2425     pc_tree->pc_tree_stats.rdcost = ctx_none->rdcost;
2426     pc_tree->pc_tree_stats.skip = ctx_none->skip;
2427 
2428     if (none_rd) *none_rd = this_rdc.rdcost;
2429     if (this_rdc.rate != INT_MAX) {
2430       if (bsize_at_least_8x8) {
2431         this_rdc.rate += pt_cost;
2432         this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
2433       }
2434 
2435       if (this_rdc.rdcost < best_rdc.rdcost) {
2436         // Adjust dist breakout threshold according to the partition size.
2437         const int64_t dist_breakout_thr =
2438             cpi->sf.partition_search_breakout_dist_thr >>
2439             ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
2440              (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
2441         const int rate_breakout_thr =
2442             cpi->sf.partition_search_breakout_rate_thr *
2443             num_pels_log2_lookup[bsize];
2444 
2445         best_rdc = this_rdc;
2446         if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
2447 
2448         pc_tree->cb_search_range = SEARCH_FULL_PLANE;
2449 
2450         if (!x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
2451           const int use_ml_based_breakout =
2452               bsize <= cpi->sf.use_square_partition_only_threshold &&
2453               bsize > BLOCK_4X4 && xd->bd == 8;
2454 
2455           // TODO(anyone): Currently this is using the same model and threshold
2456           // values as in rd_pick_partition. Retraining the model and tuning the
2457           // threshold values might be helpful to improve the speed.
2458           if (use_ml_based_breakout) {
2459             if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
2460                                     x->source_variance)) {
2461               do_square_split = 0;
2462             }
2463           }
2464 
2465           // If all y, u, v transform blocks in this partition are skippable,
2466           // and the dist & rate are within the thresholds, the partition search
2467           // is terminated for current branch of the partition search tree. The
2468           // dist & rate thresholds are set to 0 at speed 0 to disable the early
2469           // termination at that speed.
2470           if (best_rdc.dist < dist_breakout_thr &&
2471               best_rdc.rate < rate_breakout_thr) {
2472             do_square_split = 0;
2473           }
2474         }
2475 
2476         if (cpi->sf.firstpass_simple_motion_search_early_term &&
2477             cm->show_frame && bsize <= BLOCK_32X32 && bsize >= BLOCK_8X8 &&
2478             !frame_is_intra_only(cm) && mi_row + mi_step < cm->mi_rows &&
2479             mi_col + mi_step < cm->mi_cols && this_rdc.rdcost < INT64_MAX &&
2480             this_rdc.rdcost >= 0 && this_rdc.rate < INT_MAX &&
2481             this_rdc.rate >= 0 && do_square_split) {
2482           av1_firstpass_simple_motion_search_early_term(
2483               cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc,
2484               &do_square_split);
2485         }
2486       }
2487     }
2488 
2489     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2490   }
2491 
2492   // store estimated motion vector
2493   if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
2494 
2495   int64_t temp_best_rdcost = best_rdc.rdcost;
2496   pn_rdc = best_rdc;
2497 
2498   // PARTITION_SPLIT
2499   if (do_square_split) {
2500     int reached_last_index = 0;
2501     subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2502     int idx;
2503 
2504     sum_rdc.rate = partition_cost[PARTITION_SPLIT];
2505     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
2506 
2507     for (idx = 0; idx < 4 && sum_rdc.rdcost < temp_best_rdcost; ++idx) {
2508       const int x_idx = (idx & 1) * mi_step;
2509       const int y_idx = (idx >> 1) * mi_step;
2510 
2511       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
2512         continue;
2513 
2514       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
2515 
2516       pc_tree->split[idx]->index = idx;
2517       int64_t *p_split_rd = &split_rd[idx];
2518       const int64_t best_remain_rdcost =
2519           (temp_best_rdcost == INT64_MAX) ? INT64_MAX
2520                                           : (temp_best_rdcost - sum_rdc.rdcost);
2521       rd_pick_sqr_partition(
2522           cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize,
2523           &this_rdc, best_remain_rdcost, pc_tree->split[idx], p_split_rd);
2524 
2525       pc_tree->pc_tree_stats.sub_block_rdcost[idx] = this_rdc.rdcost;
2526       pc_tree->pc_tree_stats.sub_block_skip[idx] =
2527           pc_tree->split[idx]->none.skip;
2528 
2529       if (this_rdc.rate == INT_MAX) {
2530         sum_rdc.rdcost = INT64_MAX;
2531         break;
2532       } else {
2533         sum_rdc.rate += this_rdc.rate;
2534         sum_rdc.dist += this_rdc.dist;
2535         sum_rdc.rdcost += this_rdc.rdcost;
2536       }
2537     }
2538     reached_last_index = (idx == 4);
2539 
2540     if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
2541       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
2542 
2543       if (sum_rdc.rdcost < best_rdc.rdcost) {
2544         best_rdc = sum_rdc;
2545         pc_tree->partitioning = PARTITION_SPLIT;
2546       }
2547     }
2548 
2549     int has_split = 0;
2550     if (pc_tree->partitioning == PARTITION_SPLIT) {
2551       for (int cb_idx = 0; cb_idx <= AOMMIN(idx, 3); ++cb_idx) {
2552         if (pc_tree->split[cb_idx]->partitioning == PARTITION_SPLIT)
2553           ++has_split;
2554       }
2555 
2556       if (has_split >= 3 || sum_rdc.rdcost < (pn_rdc.rdcost >> 1)) {
2557         pc_tree->cb_search_range = SPLIT_PLANE;
2558       }
2559     }
2560 
2561     if (pc_tree->partitioning == PARTITION_NONE) {
2562       pc_tree->cb_search_range = SEARCH_SAME_PLANE;
2563       if (pn_rdc.dist <= sum_rdc.dist)
2564         pc_tree->cb_search_range = NONE_PARTITION_PLANE;
2565     }
2566 
2567     if (pn_rdc.rate == INT_MAX) pc_tree->cb_search_range = NONE_PARTITION_PLANE;
2568 
2569     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2570   }  // if (do_split)
2571 
2572   pc_tree->pc_tree_stats.split = pc_tree->partitioning == PARTITION_SPLIT;
2573   if (do_square_split) {
2574     for (int i = 0; i < 4; ++i) {
2575       pc_tree->pc_tree_stats.sub_block_split[i] =
2576           pc_tree->split[i]->partitioning == PARTITION_SPLIT;
2577     }
2578   }
2579 
2580   // TODO(jbb): This code added so that we avoid static analysis
2581   // warning related to the fact that best_rd isn't used after this
2582   // point.  This code should be refactored so that the duplicate
2583   // checks occur in some sub function and thus are used...
2584   (void)best_rd;
2585   *rd_cost = best_rdc;
2586 
2587   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
2588       pc_tree->index != 3) {
2589     if (bsize == cm->seq_params.sb_size) {
2590       restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
2591     } else {
2592       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
2593                 pc_tree, NULL);
2594     }
2595   }
2596 
2597   if (bsize == cm->seq_params.sb_size) {
2598     assert(best_rdc.rate < INT_MAX);
2599     assert(best_rdc.dist < INT64_MAX);
2600   } else {
2601     assert(tp_orig == *tp);
2602   }
2603 }
2604 
2605 // split_score indicates confidence of picking split partition;
2606 // none_score indicates confidence of picking none partition;
2607 #define FEATURE_SIZE 19
ml_prune_2pass_split_partition(const PC_TREE_STATS * pc_tree_stats,BLOCK_SIZE bsize,int * split_score,int * none_score)2608 static int ml_prune_2pass_split_partition(const PC_TREE_STATS *pc_tree_stats,
2609                                           BLOCK_SIZE bsize, int *split_score,
2610                                           int *none_score) {
2611   if (!pc_tree_stats->valid) return 0;
2612   const float *split_weights = NULL;
2613   const float *none_weights = NULL;
2614   switch (bsize) {
2615     case BLOCK_4X4: break;
2616     case BLOCK_8X8:
2617       split_weights = av1_2pass_split_partition_weights_8;
2618       none_weights = av1_2pass_none_partition_weights_8;
2619       break;
2620     case BLOCK_16X16:
2621       split_weights = av1_2pass_split_partition_weights_16;
2622       none_weights = av1_2pass_none_partition_weights_16;
2623       break;
2624     case BLOCK_32X32:
2625       split_weights = av1_2pass_split_partition_weights_32;
2626       none_weights = av1_2pass_none_partition_weights_32;
2627       break;
2628     case BLOCK_64X64:
2629       split_weights = av1_2pass_split_partition_weights_64;
2630       none_weights = av1_2pass_none_partition_weights_64;
2631       break;
2632     case BLOCK_128X128:
2633       split_weights = av1_2pass_split_partition_weights_128;
2634       none_weights = av1_2pass_none_partition_weights_128;
2635       break;
2636     default: assert(0 && "Unexpected bsize.");
2637   }
2638   if (!split_weights || !none_weights) return 0;
2639 
2640   aom_clear_system_state();
2641 
2642   float features[FEATURE_SIZE];
2643   int feature_index = 0;
2644   features[feature_index++] = (float)pc_tree_stats->split;
2645   features[feature_index++] = (float)pc_tree_stats->skip;
2646   const int rdcost = (int)AOMMIN(INT_MAX, pc_tree_stats->rdcost);
2647   const int rd_valid = rdcost > 0 && rdcost < 1000000000;
2648   features[feature_index++] = (float)rd_valid;
2649   for (int i = 0; i < 4; ++i) {
2650     features[feature_index++] = (float)pc_tree_stats->sub_block_split[i];
2651     features[feature_index++] = (float)pc_tree_stats->sub_block_skip[i];
2652     const int sub_rdcost =
2653         (int)AOMMIN(INT_MAX, pc_tree_stats->sub_block_rdcost[i]);
2654     const int sub_rd_valid = sub_rdcost > 0 && sub_rdcost < 1000000000;
2655     features[feature_index++] = (float)sub_rd_valid;
2656     // Ratio between the sub-block RD and the whole-block RD.
2657     float rd_ratio = 1.0f;
2658     if (rd_valid && sub_rd_valid && sub_rdcost < rdcost)
2659       rd_ratio = (float)sub_rdcost / (float)rdcost;
2660     features[feature_index++] = rd_ratio;
2661   }
2662   assert(feature_index == FEATURE_SIZE);
2663 
2664   float score_1 = split_weights[FEATURE_SIZE];
2665   float score_2 = none_weights[FEATURE_SIZE];
2666   for (int i = 0; i < FEATURE_SIZE; ++i) {
2667     score_1 += features[i] * split_weights[i];
2668     score_2 += features[i] * none_weights[i];
2669   }
2670   *split_score = (int)(score_1 * 100);
2671   *none_score = (int)(score_2 * 100);
2672   return 1;
2673 }
2674 #undef FEATURE_SIZE
2675 
ml_prune_rect_partition(const AV1_COMP * const cpi,const MACROBLOCK * const x,BLOCK_SIZE bsize,int64_t best_rd,int64_t none_rd,int64_t * split_rd,int * const dst_prune_horz,int * const dst_prune_vert)2676 static void ml_prune_rect_partition(const AV1_COMP *const cpi,
2677                                     const MACROBLOCK *const x, BLOCK_SIZE bsize,
2678                                     int64_t best_rd, int64_t none_rd,
2679                                     int64_t *split_rd,
2680                                     int *const dst_prune_horz,
2681                                     int *const dst_prune_vert) {
2682   if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
2683   best_rd = AOMMAX(best_rd, 1);
2684   const NN_CONFIG *nn_config = NULL;
2685   const float prob_thresholds[5] = { 0.01f, 0.01f, 0.004f, 0.002f, 0.002f };
2686   float cur_thresh = 0.0f;
2687   switch (bsize) {
2688     case BLOCK_8X8:
2689       nn_config = &av1_rect_partition_nnconfig_8;
2690       cur_thresh = prob_thresholds[0];
2691       break;
2692     case BLOCK_16X16:
2693       nn_config = &av1_rect_partition_nnconfig_16;
2694       cur_thresh = prob_thresholds[1];
2695       break;
2696     case BLOCK_32X32:
2697       nn_config = &av1_rect_partition_nnconfig_32;
2698       cur_thresh = prob_thresholds[2];
2699       break;
2700     case BLOCK_64X64:
2701       nn_config = &av1_rect_partition_nnconfig_64;
2702       cur_thresh = prob_thresholds[3];
2703       break;
2704     case BLOCK_128X128:
2705       nn_config = &av1_rect_partition_nnconfig_128;
2706       cur_thresh = prob_thresholds[4];
2707       break;
2708     default: assert(0 && "Unexpected bsize.");
2709   }
2710   if (!nn_config) return;
2711   aom_clear_system_state();
2712 
2713   // 1. Compute input features
2714   float features[9];
2715 
2716   // RD cost ratios
2717   for (int i = 0; i < 5; i++) features[i] = 1.0f;
2718   if (none_rd > 0 && none_rd < 1000000000)
2719     features[0] = (float)none_rd / (float)best_rd;
2720   for (int i = 0; i < 4; i++) {
2721     if (split_rd[i] > 0 && split_rd[i] < 1000000000)
2722       features[1 + i] = (float)split_rd[i] / (float)best_rd;
2723   }
2724 
2725   // Variance ratios
2726   const MACROBLOCKD *const xd = &x->e_mbd;
2727   int whole_block_variance;
2728   if (is_cur_buf_hbd(xd)) {
2729     whole_block_variance = av1_high_get_sby_perpixel_variance(
2730         cpi, &x->plane[0].src, bsize, xd->bd);
2731   } else {
2732     whole_block_variance =
2733         av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
2734   }
2735   whole_block_variance = AOMMAX(whole_block_variance, 1);
2736 
2737   int split_variance[4];
2738   const BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
2739   struct buf_2d buf;
2740   buf.stride = x->plane[0].src.stride;
2741   const int bw = block_size_wide[bsize];
2742   for (int i = 0; i < 4; ++i) {
2743     const int x_idx = (i & 1) * bw / 2;
2744     const int y_idx = (i >> 1) * bw / 2;
2745     buf.buf = x->plane[0].src.buf + x_idx + y_idx * buf.stride;
2746     if (is_cur_buf_hbd(xd)) {
2747       split_variance[i] =
2748           av1_high_get_sby_perpixel_variance(cpi, &buf, subsize, xd->bd);
2749     } else {
2750       split_variance[i] = av1_get_sby_perpixel_variance(cpi, &buf, subsize);
2751     }
2752   }
2753 
2754   for (int i = 0; i < 4; i++)
2755     features[5 + i] = (float)split_variance[i] / (float)whole_block_variance;
2756 
2757   // 2. Do the prediction and prune 0-2 partitions based on their probabilities
2758   float raw_scores[3] = { 0.0f };
2759   av1_nn_predict(features, nn_config, raw_scores);
2760   aom_clear_system_state();
2761   float probs[3] = { 0.0f };
2762   av1_nn_softmax(raw_scores, probs, 3);
2763 
2764   // probs[0] is the probability of the fact that both rectangular partitions
2765   // are worse than current best_rd
2766   if (probs[1] <= cur_thresh) (*dst_prune_horz) = 1;
2767   if (probs[2] <= cur_thresh) (*dst_prune_vert) = 1;
2768 }
2769 
2770 // Use a ML model to predict if horz_a, horz_b, vert_a, and vert_b should be
2771 // considered.
ml_prune_ab_partition(BLOCK_SIZE bsize,int part_ctx,int var_ctx,int64_t best_rd,int64_t horz_rd[2],int64_t vert_rd[2],int64_t split_rd[4],int * const horza_partition_allowed,int * const horzb_partition_allowed,int * const verta_partition_allowed,int * const vertb_partition_allowed)2772 static void ml_prune_ab_partition(BLOCK_SIZE bsize, int part_ctx, int var_ctx,
2773                                   int64_t best_rd, int64_t horz_rd[2],
2774                                   int64_t vert_rd[2], int64_t split_rd[4],
2775                                   int *const horza_partition_allowed,
2776                                   int *const horzb_partition_allowed,
2777                                   int *const verta_partition_allowed,
2778                                   int *const vertb_partition_allowed) {
2779   if (bsize < BLOCK_8X8 || best_rd >= 1000000000) return;
2780   const NN_CONFIG *nn_config = NULL;
2781   switch (bsize) {
2782     case BLOCK_8X8: nn_config = NULL; break;
2783     case BLOCK_16X16: nn_config = &av1_ab_partition_nnconfig_16; break;
2784     case BLOCK_32X32: nn_config = &av1_ab_partition_nnconfig_32; break;
2785     case BLOCK_64X64: nn_config = &av1_ab_partition_nnconfig_64; break;
2786     case BLOCK_128X128: nn_config = &av1_ab_partition_nnconfig_128; break;
2787     default: assert(0 && "Unexpected bsize.");
2788   }
2789   if (!nn_config) return;
2790 
2791   aom_clear_system_state();
2792 
2793   // Generate features.
2794   float features[10];
2795   int feature_index = 0;
2796   features[feature_index++] = (float)part_ctx;
2797   features[feature_index++] = (float)var_ctx;
2798   const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
2799   int sub_block_rdcost[8] = { 0 };
2800   int rd_index = 0;
2801   for (int i = 0; i < 2; ++i) {
2802     if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
2803       sub_block_rdcost[rd_index] = (int)horz_rd[i];
2804     ++rd_index;
2805   }
2806   for (int i = 0; i < 2; ++i) {
2807     if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
2808       sub_block_rdcost[rd_index] = (int)vert_rd[i];
2809     ++rd_index;
2810   }
2811   for (int i = 0; i < 4; ++i) {
2812     if (split_rd[i] > 0 && split_rd[i] < 1000000000)
2813       sub_block_rdcost[rd_index] = (int)split_rd[i];
2814     ++rd_index;
2815   }
2816   for (int i = 0; i < 8; ++i) {
2817     // Ratio between the sub-block RD and the whole-block RD.
2818     float rd_ratio = 1.0f;
2819     if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
2820       rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
2821     features[feature_index++] = rd_ratio;
2822   }
2823   assert(feature_index == 10);
2824 
2825   // Calculate scores using the NN model.
2826   float score[16] = { 0.0f };
2827   av1_nn_predict(features, nn_config, score);
2828   aom_clear_system_state();
2829   int int_score[16];
2830   int max_score = -1000;
2831   for (int i = 0; i < 16; ++i) {
2832     int_score[i] = (int)(100 * score[i]);
2833     max_score = AOMMAX(int_score[i], max_score);
2834   }
2835 
2836   // Make decisions based on the model scores.
2837   int thresh = max_score;
2838   switch (bsize) {
2839     case BLOCK_16X16: thresh -= 150; break;
2840     case BLOCK_32X32: thresh -= 100; break;
2841     default: break;
2842   }
2843   *horza_partition_allowed = 0;
2844   *horzb_partition_allowed = 0;
2845   *verta_partition_allowed = 0;
2846   *vertb_partition_allowed = 0;
2847   for (int i = 0; i < 16; ++i) {
2848     if (int_score[i] >= thresh) {
2849       if ((i >> 0) & 1) *horza_partition_allowed = 1;
2850       if ((i >> 1) & 1) *horzb_partition_allowed = 1;
2851       if ((i >> 2) & 1) *verta_partition_allowed = 1;
2852       if ((i >> 3) & 1) *vertb_partition_allowed = 1;
2853     }
2854   }
2855 }
2856 
2857 #define FEATURES 18
2858 #define LABELS 4
2859 // Use a ML model to predict if horz4 and vert4 should be considered.
ml_prune_4_partition(const AV1_COMP * const cpi,MACROBLOCK * const x,BLOCK_SIZE bsize,int part_ctx,int64_t best_rd,int64_t horz_rd[2],int64_t vert_rd[2],int64_t split_rd[4],int * const partition_horz4_allowed,int * const partition_vert4_allowed,unsigned int pb_source_variance,int mi_row,int mi_col)2860 static void ml_prune_4_partition(const AV1_COMP *const cpi, MACROBLOCK *const x,
2861                                  BLOCK_SIZE bsize, int part_ctx,
2862                                  int64_t best_rd, int64_t horz_rd[2],
2863                                  int64_t vert_rd[2], int64_t split_rd[4],
2864                                  int *const partition_horz4_allowed,
2865                                  int *const partition_vert4_allowed,
2866                                  unsigned int pb_source_variance, int mi_row,
2867                                  int mi_col) {
2868   if (best_rd >= 1000000000) return;
2869   const NN_CONFIG *nn_config = NULL;
2870   switch (bsize) {
2871     case BLOCK_16X16: nn_config = &av1_4_partition_nnconfig_16; break;
2872     case BLOCK_32X32: nn_config = &av1_4_partition_nnconfig_32; break;
2873     case BLOCK_64X64: nn_config = &av1_4_partition_nnconfig_64; break;
2874     default: assert(0 && "Unexpected bsize.");
2875   }
2876   if (!nn_config) return;
2877 
2878   aom_clear_system_state();
2879 
2880   // Generate features.
2881   float features[FEATURES];
2882   int feature_index = 0;
2883   features[feature_index++] = (float)part_ctx;
2884   features[feature_index++] = (float)get_unsigned_bits(pb_source_variance);
2885 
2886   const int rdcost = (int)AOMMIN(INT_MAX, best_rd);
2887   int sub_block_rdcost[8] = { 0 };
2888   int rd_index = 0;
2889   for (int i = 0; i < 2; ++i) {
2890     if (horz_rd[i] > 0 && horz_rd[i] < 1000000000)
2891       sub_block_rdcost[rd_index] = (int)horz_rd[i];
2892     ++rd_index;
2893   }
2894   for (int i = 0; i < 2; ++i) {
2895     if (vert_rd[i] > 0 && vert_rd[i] < 1000000000)
2896       sub_block_rdcost[rd_index] = (int)vert_rd[i];
2897     ++rd_index;
2898   }
2899   for (int i = 0; i < 4; ++i) {
2900     if (split_rd[i] > 0 && split_rd[i] < 1000000000)
2901       sub_block_rdcost[rd_index] = (int)split_rd[i];
2902     ++rd_index;
2903   }
2904   for (int i = 0; i < 8; ++i) {
2905     // Ratio between the sub-block RD and the whole-block RD.
2906     float rd_ratio = 1.0f;
2907     if (sub_block_rdcost[i] > 0 && sub_block_rdcost[i] < rdcost)
2908       rd_ratio = (float)sub_block_rdcost[i] / (float)rdcost;
2909     features[feature_index++] = rd_ratio;
2910   }
2911 
2912   // Get variance of the 1:4 and 4:1 sub-blocks.
2913   unsigned int horz_4_source_var[4] = { 0 };
2914   unsigned int vert_4_source_var[4] = { 0 };
2915   {
2916     BLOCK_SIZE horz_4_bs = get_partition_subsize(bsize, PARTITION_HORZ_4);
2917     BLOCK_SIZE vert_4_bs = get_partition_subsize(bsize, PARTITION_VERT_4);
2918     av1_setup_src_planes(x, cpi->source, mi_row, mi_col,
2919                          av1_num_planes(&cpi->common), bsize);
2920     const int src_stride = x->plane[0].src.stride;
2921     const uint8_t *src = x->plane[0].src.buf;
2922     const MACROBLOCKD *const xd = &x->e_mbd;
2923     for (int i = 0; i < 4; ++i) {
2924       const uint8_t *horz_src =
2925           src + i * block_size_high[horz_4_bs] * src_stride;
2926       const uint8_t *vert_src = src + i * block_size_wide[vert_4_bs];
2927       unsigned int horz_var, vert_var, sse;
2928       if (is_cur_buf_hbd(xd)) {
2929         switch (xd->bd) {
2930           case 10:
2931             horz_var = cpi->fn_ptr[horz_4_bs].vf(
2932                 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
2933                 0, &sse);
2934             vert_var = cpi->fn_ptr[vert_4_bs].vf(
2935                 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_10),
2936                 0, &sse);
2937             break;
2938           case 12:
2939             horz_var = cpi->fn_ptr[horz_4_bs].vf(
2940                 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
2941                 0, &sse);
2942             vert_var = cpi->fn_ptr[vert_4_bs].vf(
2943                 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_12),
2944                 0, &sse);
2945             break;
2946           case 8:
2947           default:
2948             horz_var = cpi->fn_ptr[horz_4_bs].vf(
2949                 horz_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
2950                 0, &sse);
2951             vert_var = cpi->fn_ptr[vert_4_bs].vf(
2952                 vert_src, src_stride, CONVERT_TO_BYTEPTR(AV1_HIGH_VAR_OFFS_8),
2953                 0, &sse);
2954             break;
2955         }
2956         horz_4_source_var[i] =
2957             ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
2958         vert_4_source_var[i] =
2959             ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
2960       } else {
2961         horz_var = cpi->fn_ptr[horz_4_bs].vf(horz_src, src_stride, AV1_VAR_OFFS,
2962                                              0, &sse);
2963         vert_var = cpi->fn_ptr[vert_4_bs].vf(vert_src, src_stride, AV1_VAR_OFFS,
2964                                              0, &sse);
2965         horz_4_source_var[i] =
2966             ROUND_POWER_OF_TWO(horz_var, num_pels_log2_lookup[horz_4_bs]);
2967         vert_4_source_var[i] =
2968             ROUND_POWER_OF_TWO(vert_var, num_pels_log2_lookup[vert_4_bs]);
2969       }
2970     }
2971   }
2972 
2973   const float denom = (float)(pb_source_variance + 1);
2974   const float low_b = 0.1f;
2975   const float high_b = 10.0f;
2976   for (int i = 0; i < 4; ++i) {
2977     // Ratio between the 4:1 sub-block variance and the whole-block variance.
2978     float var_ratio = (float)(horz_4_source_var[i] + 1) / denom;
2979     if (var_ratio < low_b) var_ratio = low_b;
2980     if (var_ratio > high_b) var_ratio = high_b;
2981     features[feature_index++] = var_ratio;
2982   }
2983   for (int i = 0; i < 4; ++i) {
2984     // Ratio between the 1:4 sub-block RD and the whole-block RD.
2985     float var_ratio = (float)(vert_4_source_var[i] + 1) / denom;
2986     if (var_ratio < low_b) var_ratio = low_b;
2987     if (var_ratio > high_b) var_ratio = high_b;
2988     features[feature_index++] = var_ratio;
2989   }
2990   assert(feature_index == FEATURES);
2991 
2992   // Calculate scores using the NN model.
2993   float score[LABELS] = { 0.0f };
2994   av1_nn_predict(features, nn_config, score);
2995   aom_clear_system_state();
2996   int int_score[LABELS];
2997   int max_score = -1000;
2998   for (int i = 0; i < LABELS; ++i) {
2999     int_score[i] = (int)(100 * score[i]);
3000     max_score = AOMMAX(int_score[i], max_score);
3001   }
3002 
3003   // Make decisions based on the model scores.
3004   int thresh = max_score;
3005   switch (bsize) {
3006     case BLOCK_16X16: thresh -= 500; break;
3007     case BLOCK_32X32: thresh -= 500; break;
3008     case BLOCK_64X64: thresh -= 200; break;
3009     default: break;
3010   }
3011   *partition_horz4_allowed = 0;
3012   *partition_vert4_allowed = 0;
3013   for (int i = 0; i < LABELS; ++i) {
3014     if (int_score[i] >= thresh) {
3015       if ((i >> 0) & 1) *partition_horz4_allowed = 1;
3016       if ((i >> 1) & 1) *partition_vert4_allowed = 1;
3017     }
3018   }
3019 }
3020 #undef FEATURES
3021 #undef LABELS
3022 
3023 #define FEATURES 4
3024 // ML-based partition search breakout.
ml_predict_breakout(const AV1_COMP * const cpi,BLOCK_SIZE bsize,const MACROBLOCK * const x,const RD_STATS * const rd_stats,unsigned int pb_source_variance)3025 static int ml_predict_breakout(const AV1_COMP *const cpi, BLOCK_SIZE bsize,
3026                                const MACROBLOCK *const x,
3027                                const RD_STATS *const rd_stats,
3028                                unsigned int pb_source_variance) {
3029   const NN_CONFIG *nn_config = NULL;
3030   int thresh = 0;
3031   switch (bsize) {
3032     case BLOCK_8X8:
3033       nn_config = &av1_partition_breakout_nnconfig_8;
3034       thresh = cpi->sf.ml_partition_search_breakout_thresh[0];
3035       break;
3036     case BLOCK_16X16:
3037       nn_config = &av1_partition_breakout_nnconfig_16;
3038       thresh = cpi->sf.ml_partition_search_breakout_thresh[1];
3039       break;
3040     case BLOCK_32X32:
3041       nn_config = &av1_partition_breakout_nnconfig_32;
3042       thresh = cpi->sf.ml_partition_search_breakout_thresh[2];
3043       break;
3044     case BLOCK_64X64:
3045       nn_config = &av1_partition_breakout_nnconfig_64;
3046       thresh = cpi->sf.ml_partition_search_breakout_thresh[3];
3047       break;
3048     case BLOCK_128X128:
3049       nn_config = &av1_partition_breakout_nnconfig_128;
3050       thresh = cpi->sf.ml_partition_search_breakout_thresh[4];
3051       break;
3052     default: assert(0 && "Unexpected bsize.");
3053   }
3054   if (!nn_config || thresh < 0) return 0;
3055 
3056   // Generate feature values.
3057   float features[FEATURES];
3058   int feature_index = 0;
3059   aom_clear_system_state();
3060 
3061   const int num_pels_log2 = num_pels_log2_lookup[bsize];
3062   float rate_f = (float)AOMMIN(rd_stats->rate, INT_MAX);
3063   rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) *
3064            rate_f;
3065   features[feature_index++] = rate_f;
3066 
3067   const float dist_f =
3068       (float)(AOMMIN(rd_stats->dist, INT_MAX) >> num_pels_log2);
3069   features[feature_index++] = dist_f;
3070 
3071   features[feature_index++] = (float)pb_source_variance;
3072 
3073   const int dc_q = (int)x->plane[0].dequant_QTX[0];
3074   features[feature_index++] = (float)(dc_q * dc_q) / 256.0f;
3075   assert(feature_index == FEATURES);
3076 
3077   // Calculate score using the NN model.
3078   float score = 0.0f;
3079   av1_nn_predict(features, nn_config, &score);
3080   aom_clear_system_state();
3081 
3082   // Make decision.
3083   return (int)(score * 100) >= thresh;
3084 }
3085 #undef FEATURES
3086 
3087 // Record the ref frames that have been selected by square partition blocks.
update_picked_ref_frames_mask(MACROBLOCK * const x,int ref_type,BLOCK_SIZE bsize,int mib_size,int mi_row,int mi_col)3088 static void update_picked_ref_frames_mask(MACROBLOCK *const x, int ref_type,
3089                                           BLOCK_SIZE bsize, int mib_size,
3090                                           int mi_row, int mi_col) {
3091   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
3092   const int sb_size_mask = mib_size - 1;
3093   const int mi_row_in_sb = mi_row & sb_size_mask;
3094   const int mi_col_in_sb = mi_col & sb_size_mask;
3095   const int mi_size = mi_size_wide[bsize];
3096   for (int i = mi_row_in_sb; i < mi_row_in_sb + mi_size; ++i) {
3097     for (int j = mi_col_in_sb; j < mi_col_in_sb + mi_size; ++j) {
3098       x->picked_ref_frames_mask[i * 32 + j] |= 1 << ref_type;
3099     }
3100   }
3101 }
3102 
3103 // TODO(jinging,jimbankoski,rbultje): properly skip partition types that are
3104 // unlikely to be selected depending on previous rate-distortion optimization
3105 // results, for encoding speed-up.
3106 // TODO(chiyotsai@google.com): Move these ml related varables to a seprate file
3107 // to separate low level ml logic from partition logic
3108 #define NUM_SIMPLE_MOTION_FEATURES 28
rd_pick_partition(AV1_COMP * const cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE max_sq_part,BLOCK_SIZE min_sq_part,RD_STATS * rd_cost,int64_t best_rd,PC_TREE * pc_tree,int64_t * none_rd)3109 static void rd_pick_partition(AV1_COMP *const cpi, ThreadData *td,
3110                               TileDataEnc *tile_data, TOKENEXTRA **tp,
3111                               int mi_row, int mi_col, BLOCK_SIZE bsize,
3112                               BLOCK_SIZE max_sq_part, BLOCK_SIZE min_sq_part,
3113                               RD_STATS *rd_cost, int64_t best_rd,
3114                               PC_TREE *pc_tree, int64_t *none_rd) {
3115   const AV1_COMMON *const cm = &cpi->common;
3116   const int num_planes = av1_num_planes(cm);
3117   TileInfo *const tile_info = &tile_data->tile_info;
3118   MACROBLOCK *const x = &td->mb;
3119   MACROBLOCKD *const xd = &x->e_mbd;
3120   const int mi_step = mi_size_wide[bsize] / 2;
3121   RD_SEARCH_MACROBLOCK_CONTEXT x_ctx;
3122   const TOKENEXTRA *const tp_orig = *tp;
3123   PICK_MODE_CONTEXT *ctx_none = &pc_tree->none;
3124   int tmp_partition_cost[PARTITION_TYPES];
3125   BLOCK_SIZE subsize;
3126   RD_STATS this_rdc, sum_rdc, best_rdc;
3127   const int bsize_at_least_8x8 = (bsize >= BLOCK_8X8);
3128   int do_square_split = bsize_at_least_8x8;
3129   const int pl = bsize_at_least_8x8
3130                      ? partition_plane_context(xd, mi_row, mi_col, bsize)
3131                      : 0;
3132   const int *partition_cost =
3133       pl >= 0 ? x->partition_cost[pl] : x->partition_cost[0];
3134 
3135   int do_rectangular_split = cpi->oxcf.enable_rect_partitions;
3136   int64_t cur_none_rd = 0;
3137   int64_t split_rd[4] = { 0, 0, 0, 0 };
3138   int64_t horz_rd[2] = { 0, 0 };
3139   int64_t vert_rd[2] = { 0, 0 };
3140   int prune_horz = 0;
3141   int prune_vert = 0;
3142   int terminate_partition_search = 0;
3143 
3144   int split_ctx_is_ready[2] = { 0, 0 };
3145   int horz_ctx_is_ready = 0;
3146   int vert_ctx_is_ready = 0;
3147   BLOCK_SIZE bsize2 = get_partition_subsize(bsize, PARTITION_SPLIT);
3148 
3149   if (best_rd < 0) {
3150     pc_tree->none.rdcost = INT64_MAX;
3151     pc_tree->none.skip = 0;
3152     av1_invalid_rd_stats(rd_cost);
3153     return;
3154   }
3155   if (bsize == cm->seq_params.sb_size) x->must_find_valid_partition = 0;
3156 
3157   // Override skipping rectangular partition operations for edge blocks
3158   const int has_rows = (mi_row + mi_step < cm->mi_rows);
3159   const int has_cols = (mi_col + mi_step < cm->mi_cols);
3160   const int xss = x->e_mbd.plane[1].subsampling_x;
3161   const int yss = x->e_mbd.plane[1].subsampling_y;
3162 
3163   if (none_rd) *none_rd = 0;
3164   int partition_none_allowed = has_rows && has_cols;
3165   int partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 &&
3166                                cpi->oxcf.enable_rect_partitions;
3167   int partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
3168                                cpi->oxcf.enable_rect_partitions;
3169 
3170   (void)*tp_orig;
3171 
3172 #if CONFIG_COLLECT_PARTITION_STATS
3173   int partition_decisions[EXT_PARTITION_TYPES] = { 0 };
3174   int partition_attempts[EXT_PARTITION_TYPES] = { 0 };
3175   int64_t partition_times[EXT_PARTITION_TYPES] = { 0 };
3176   struct aom_usec_timer partition_timer = { 0 };
3177   int partition_timer_on = 0;
3178 #if CONFIG_COLLECT_PARTITION_STATS == 2
3179   PartitionStats *part_stats = &cpi->partition_stats;
3180 #endif
3181 #endif
3182 
3183   // Override partition costs at the edges of the frame in the same
3184   // way as in read_partition (see decodeframe.c)
3185   if (!(has_rows && has_cols)) {
3186     assert(bsize_at_least_8x8 && pl >= 0);
3187     const aom_cdf_prob *partition_cdf = cm->fc->partition_cdf[pl];
3188     for (int i = 0; i < PARTITION_TYPES; ++i) tmp_partition_cost[i] = INT_MAX;
3189     if (has_cols) {
3190       // At the bottom, the two possibilities are HORZ and SPLIT
3191       aom_cdf_prob bot_cdf[2];
3192       partition_gather_vert_alike(bot_cdf, partition_cdf, bsize);
3193       static const int bot_inv_map[2] = { PARTITION_HORZ, PARTITION_SPLIT };
3194       av1_cost_tokens_from_cdf(tmp_partition_cost, bot_cdf, bot_inv_map);
3195     } else if (has_rows) {
3196       // At the right, the two possibilities are VERT and SPLIT
3197       aom_cdf_prob rhs_cdf[2];
3198       partition_gather_horz_alike(rhs_cdf, partition_cdf, bsize);
3199       static const int rhs_inv_map[2] = { PARTITION_VERT, PARTITION_SPLIT };
3200       av1_cost_tokens_from_cdf(tmp_partition_cost, rhs_cdf, rhs_inv_map);
3201     } else {
3202       // At the bottom right, we always split
3203       tmp_partition_cost[PARTITION_SPLIT] = 0;
3204     }
3205 
3206     partition_cost = tmp_partition_cost;
3207     do_square_split &= partition_cost[PARTITION_SPLIT] != INT_MAX;
3208   }
3209 
3210 #ifndef NDEBUG
3211   // Nothing should rely on the default value of this array (which is just
3212   // leftover from encoding the previous block. Setting it to fixed pattern
3213   // when debugging.
3214   // bit 0, 1, 2 are blk_skip of each plane
3215   // bit 4, 5, 6 are initialization checking of each plane
3216   memset(x->blk_skip, 0x77, sizeof(x->blk_skip));
3217 #endif  // NDEBUG
3218 
3219   assert(mi_size_wide[bsize] == mi_size_high[bsize]);
3220 
3221   av1_init_rd_stats(&this_rdc);
3222   av1_invalid_rd_stats(&best_rdc);
3223   best_rdc.rdcost = best_rd;
3224 
3225   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
3226 
3227   if (bsize == BLOCK_16X16 && cpi->vaq_refresh)
3228     x->mb_energy = av1_log_block_var(cpi, x, bsize);
3229 
3230   if (bsize > cpi->sf.use_square_partition_only_threshold) {
3231     partition_horz_allowed &= !has_rows;
3232     partition_vert_allowed &= !has_cols;
3233   }
3234 
3235   if (bsize > BLOCK_4X4 && x->use_cb_search_range) {
3236     int split_score = 0;
3237     int none_score = 0;
3238     const int score_valid = ml_prune_2pass_split_partition(
3239         &pc_tree->pc_tree_stats, bsize, &split_score, &none_score);
3240     if (score_valid) {
3241       {
3242         const int only_split_thresh = 300;
3243         const int no_none_thresh = 250;
3244         const int no_split_thresh = 0;
3245         if (split_score > only_split_thresh) {
3246           partition_none_allowed = 0;
3247           partition_horz_allowed = 0;
3248           partition_vert_allowed = 0;
3249         } else if (split_score > no_none_thresh) {
3250           partition_none_allowed = 0;
3251         }
3252         if (split_score < no_split_thresh) do_square_split = 0;
3253       }
3254       {
3255         const int no_split_thresh = 120;
3256         const int no_none_thresh = -120;
3257         if (none_score > no_split_thresh && partition_none_allowed)
3258           do_square_split = 0;
3259         if (none_score < no_none_thresh) partition_none_allowed = 0;
3260       }
3261     } else {
3262       if (pc_tree->cb_search_range == SPLIT_PLANE) {
3263         partition_none_allowed = 0;
3264         partition_horz_allowed = 0;
3265         partition_vert_allowed = 0;
3266       }
3267       if (pc_tree->cb_search_range == SEARCH_SAME_PLANE) do_square_split = 0;
3268       if (pc_tree->cb_search_range == NONE_PARTITION_PLANE) {
3269         do_square_split = 0;
3270         partition_horz_allowed = 0;
3271         partition_vert_allowed = 0;
3272       }
3273     }
3274 
3275     // Fall back to default values in case all partition modes are rejected.
3276     if (partition_none_allowed == 0 && do_square_split == 0 &&
3277         partition_horz_allowed == 0 && partition_vert_allowed == 0) {
3278       do_square_split = bsize_at_least_8x8;
3279       partition_none_allowed = has_rows && has_cols;
3280       partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 &&
3281                                cpi->oxcf.enable_rect_partitions;
3282       partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
3283                                cpi->oxcf.enable_rect_partitions;
3284     }
3285   }
3286 
3287   xd->above_txfm_context = cm->above_txfm_context[tile_info->tile_row] + mi_col;
3288   xd->left_txfm_context =
3289       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
3290   save_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3291 
3292   // Use simple_motion_search to prune partitions. This must be done prior to
3293   // PARTITION_SPLIT to propagate the initial mvs to a smaller blocksize.
3294   const int try_split_only =
3295       cpi->sf.simple_motion_search_split_only && bsize >= BLOCK_8X8 &&
3296       do_square_split && mi_row + mi_size_high[bsize] <= cm->mi_rows &&
3297       mi_col + mi_size_wide[bsize] <= cm->mi_cols && !frame_is_intra_only(cm) &&
3298       !av1_superres_scaled(cm);
3299 
3300   if (try_split_only) {
3301     av1_simple_motion_search_based_split(
3302         cpi, x, mi_row, mi_col, bsize, &partition_none_allowed,
3303         &partition_horz_allowed, &partition_vert_allowed, &do_rectangular_split,
3304         &do_square_split);
3305   }
3306 
3307   const int try_prune_rect =
3308       cpi->sf.simple_motion_search_prune_rect && !frame_is_intra_only(cm) &&
3309       do_rectangular_split &&
3310       (do_square_split || partition_none_allowed ||
3311        (prune_horz && prune_vert)) &&
3312       (partition_horz_allowed || partition_vert_allowed) && bsize >= BLOCK_8X8;
3313 
3314   float simple_motion_features[NUM_SIMPLE_MOTION_FEATURES] = { 0.0f };
3315   int simple_motion_features_are_valid = 0;
3316 
3317   if (try_prune_rect) {
3318     av1_simple_motion_search_prune_part(
3319         cpi, x, pc_tree, mi_row, mi_col, bsize, &partition_none_allowed,
3320         &partition_horz_allowed, &partition_vert_allowed, &do_square_split,
3321         &do_rectangular_split, &prune_horz, &prune_vert, simple_motion_features,
3322         &simple_motion_features_are_valid);
3323   }
3324 
3325   // Max and min square partition levels are defined as the partition nodes that
3326   // the recursive function rd_pick_partition() can reach. To implement this:
3327   // only PARTITION_NONE is allowed if the current node equals min_sq_part,
3328   // only PARTITION_SPLIT is allowed if the current node exceeds max_sq_part.
3329   assert(block_size_wide[min_sq_part] == block_size_high[min_sq_part]);
3330   assert(block_size_wide[max_sq_part] == block_size_high[max_sq_part]);
3331   assert(min_sq_part <= max_sq_part);
3332   assert(block_size_wide[bsize] == block_size_high[bsize]);
3333   const int max_partition_size = block_size_wide[max_sq_part];
3334   const int min_partition_size = block_size_wide[min_sq_part];
3335   const int blksize = block_size_wide[bsize];
3336   assert(min_partition_size <= max_partition_size);
3337   const int is_le_min_sq_part = blksize <= min_partition_size;
3338   const int is_gt_max_sq_part = blksize > max_partition_size;
3339   if (is_gt_max_sq_part) {
3340     // If current block size is larger than max, only allow split.
3341     partition_none_allowed = 0;
3342     partition_horz_allowed = 0;
3343     partition_vert_allowed = 0;
3344     do_square_split = 1;
3345   } else if (is_le_min_sq_part) {
3346     // If current block size is less or equal to min, only allow none if valid
3347     // block large enough; only allow split otherwise.
3348     partition_horz_allowed = 0;
3349     partition_vert_allowed = 0;
3350     // only disable square split when current block is not at the picture
3351     // boundary. otherwise, inherit the square split flag from previous logic
3352     if (has_rows && has_cols) do_square_split = 0;
3353     partition_none_allowed = !do_square_split;
3354   }
3355   do_square_split &= partition_cost[PARTITION_SPLIT] != INT_MAX;
3356 
3357 BEGIN_PARTITION_SEARCH:
3358   if (x->must_find_valid_partition) {
3359     do_square_split =
3360         bsize_at_least_8x8 && partition_cost[PARTITION_SPLIT] != INT_MAX;
3361     partition_none_allowed = has_rows && has_cols;
3362     partition_horz_allowed = has_cols && yss <= xss && bsize_at_least_8x8 &&
3363                              cpi->oxcf.enable_rect_partitions;
3364     partition_vert_allowed = has_rows && xss <= yss && bsize_at_least_8x8 &&
3365                              cpi->oxcf.enable_rect_partitions;
3366     terminate_partition_search = 0;
3367   }
3368 
3369   // Partition block source pixel variance.
3370   unsigned int pb_source_variance = UINT_MAX;
3371 
3372   // Partition block sse after simple motion compensation, not in use now,
3373   // but will be used for upcoming speed features
3374   unsigned int pb_simple_motion_pred_sse = UINT_MAX;
3375   (void)pb_simple_motion_pred_sse;
3376 
3377 #if CONFIG_DIST_8X8
3378   if (x->using_dist_8x8) {
3379     if (block_size_high[bsize] <= 8) partition_horz_allowed = 0;
3380     if (block_size_wide[bsize] <= 8) partition_vert_allowed = 0;
3381     if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8)
3382       do_square_split = 0;
3383   }
3384 #endif
3385 
3386   // PARTITION_NONE
3387   if (is_le_min_sq_part && has_rows && has_cols) partition_none_allowed = 1;
3388   if (!terminate_partition_search && partition_none_allowed &&
3389       !is_gt_max_sq_part) {
3390     int pt_cost = 0;
3391     if (bsize_at_least_8x8) {
3392       pt_cost = partition_cost[PARTITION_NONE] < INT_MAX
3393                     ? partition_cost[PARTITION_NONE]
3394                     : 0;
3395     }
3396     const int64_t partition_rd_cost = RDCOST(x->rdmult, pt_cost, 0);
3397     const int64_t best_remain_rdcost =
3398         (best_rdc.rdcost == INT64_MAX) ? INT64_MAX
3399                                        : (best_rdc.rdcost - partition_rd_cost);
3400 #if CONFIG_COLLECT_PARTITION_STATS
3401     if (best_remain_rdcost >= 0) {
3402       partition_attempts[PARTITION_NONE] += 1;
3403       aom_usec_timer_start(&partition_timer);
3404       partition_timer_on = 1;
3405     }
3406 #endif
3407     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_NONE,
3408                   bsize, ctx_none, best_remain_rdcost, 0);
3409 #if CONFIG_COLLECT_PARTITION_STATS
3410     if (partition_timer_on) {
3411       aom_usec_timer_mark(&partition_timer);
3412       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3413       partition_times[PARTITION_NONE] += time;
3414       partition_timer_on = 0;
3415     }
3416 #endif
3417     pb_source_variance = x->source_variance;
3418     pb_simple_motion_pred_sse = x->simple_motion_pred_sse;
3419     if (none_rd) *none_rd = this_rdc.rdcost;
3420     cur_none_rd = this_rdc.rdcost;
3421     if (this_rdc.rate != INT_MAX) {
3422       if (cpi->sf.prune_ref_frame_for_rect_partitions) {
3423         const int ref_type = av1_ref_frame_type(ctx_none->mic.ref_frame);
3424         update_picked_ref_frames_mask(x, ref_type, bsize,
3425                                       cm->seq_params.mib_size, mi_row, mi_col);
3426       }
3427       if (bsize_at_least_8x8) {
3428         this_rdc.rate += pt_cost;
3429         this_rdc.rdcost = RDCOST(x->rdmult, this_rdc.rate, this_rdc.dist);
3430       }
3431 
3432       if (this_rdc.rdcost < best_rdc.rdcost) {
3433         // Adjust dist breakout threshold according to the partition size.
3434         const int64_t dist_breakout_thr =
3435             cpi->sf.partition_search_breakout_dist_thr >>
3436             ((2 * (MAX_SB_SIZE_LOG2 - 2)) -
3437              (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]));
3438         const int rate_breakout_thr =
3439             cpi->sf.partition_search_breakout_rate_thr *
3440             num_pels_log2_lookup[bsize];
3441 
3442         best_rdc = this_rdc;
3443         if (bsize_at_least_8x8) pc_tree->partitioning = PARTITION_NONE;
3444 
3445         if ((do_square_split || do_rectangular_split) &&
3446             !x->e_mbd.lossless[xd->mi[0]->segment_id] && ctx_none->skippable) {
3447           const int use_ml_based_breakout =
3448               bsize <= cpi->sf.use_square_partition_only_threshold &&
3449               bsize > BLOCK_4X4 && xd->bd == 8;
3450           if (use_ml_based_breakout) {
3451             if (ml_predict_breakout(cpi, bsize, x, &this_rdc,
3452                                     pb_source_variance)) {
3453               do_square_split = 0;
3454               do_rectangular_split = 0;
3455             }
3456           }
3457 
3458           // If all y, u, v transform blocks in this partition are skippable,
3459           // and the dist & rate are within the thresholds, the partition
3460           // search is terminated for current branch of the partition search
3461           // tree. The dist & rate thresholds are set to 0 at speed 0 to
3462           // disable the early termination at that speed.
3463           if (best_rdc.dist < dist_breakout_thr &&
3464               best_rdc.rate < rate_breakout_thr) {
3465             do_square_split = 0;
3466             do_rectangular_split = 0;
3467           }
3468         }
3469 
3470         if (cpi->sf.simple_motion_search_early_term_none && cm->show_frame &&
3471             !frame_is_intra_only(cm) && bsize >= BLOCK_16X16 &&
3472             mi_row + mi_step < cm->mi_rows && mi_col + mi_step < cm->mi_cols &&
3473             this_rdc.rdcost < INT64_MAX && this_rdc.rdcost >= 0 &&
3474             this_rdc.rate < INT_MAX && this_rdc.rate >= 0 &&
3475             (do_square_split || do_rectangular_split)) {
3476           av1_simple_motion_search_early_term_none(
3477               cpi, x, pc_tree, mi_row, mi_col, bsize, &this_rdc,
3478               &terminate_partition_search, simple_motion_features,
3479               &simple_motion_features_are_valid);
3480         }
3481       }
3482     }
3483 
3484     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3485   }
3486 
3487   // store estimated motion vector
3488   if (cpi->sf.adaptive_motion_search) store_pred_mv(x, ctx_none);
3489 
3490   // PARTITION_SPLIT
3491   if ((!terminate_partition_search && do_square_split) || is_gt_max_sq_part) {
3492     av1_init_rd_stats(&sum_rdc);
3493     subsize = get_partition_subsize(bsize, PARTITION_SPLIT);
3494     sum_rdc.rate = partition_cost[PARTITION_SPLIT];
3495     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3496 
3497     int idx;
3498 #if CONFIG_COLLECT_PARTITION_STATS
3499     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
3500       partition_attempts[PARTITION_SPLIT] += 1;
3501       aom_usec_timer_start(&partition_timer);
3502       partition_timer_on = 1;
3503     }
3504 #endif
3505     for (idx = 0; idx < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++idx) {
3506       const int x_idx = (idx & 1) * mi_step;
3507       const int y_idx = (idx >> 1) * mi_step;
3508 
3509       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3510         continue;
3511 
3512       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3513 
3514       pc_tree->split[idx]->index = idx;
3515       int64_t *p_split_rd = &split_rd[idx];
3516       const int64_t best_remain_rdcost =
3517           best_rdc.rdcost == INT64_MAX ? INT64_MAX
3518                                        : (best_rdc.rdcost - sum_rdc.rdcost);
3519       rd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx,
3520                         subsize, max_sq_part, min_sq_part, &this_rdc,
3521                         best_remain_rdcost, pc_tree->split[idx], p_split_rd);
3522 
3523       if (this_rdc.rate == INT_MAX) {
3524         sum_rdc.rdcost = INT64_MAX;
3525         break;
3526       } else {
3527         sum_rdc.rate += this_rdc.rate;
3528         sum_rdc.dist += this_rdc.dist;
3529         sum_rdc.rdcost += this_rdc.rdcost;
3530         if (idx <= 1 && (bsize <= BLOCK_8X8 ||
3531                          pc_tree->split[idx]->partitioning == PARTITION_NONE)) {
3532           const MB_MODE_INFO *const mbmi = &pc_tree->split[idx]->none.mic;
3533           const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3534           // Neither palette mode nor cfl predicted
3535           if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3536             if (mbmi->uv_mode != UV_CFL_PRED) split_ctx_is_ready[idx] = 1;
3537           }
3538         }
3539       }
3540     }
3541 #if CONFIG_COLLECT_PARTITION_STATS
3542     if (partition_timer_on) {
3543       aom_usec_timer_mark(&partition_timer);
3544       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3545       partition_times[PARTITION_SPLIT] += time;
3546       partition_timer_on = 0;
3547     }
3548 #endif
3549     const int reached_last_index = (idx == 4);
3550 
3551     if (reached_last_index && sum_rdc.rdcost < best_rdc.rdcost) {
3552       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3553 
3554       if (sum_rdc.rdcost < best_rdc.rdcost) {
3555         best_rdc = sum_rdc;
3556         pc_tree->partitioning = PARTITION_SPLIT;
3557       }
3558     } else if (cpi->sf.less_rectangular_check_level > 0) {
3559       // skip rectangular partition test when larger block size
3560       // gives better rd cost
3561       if (cpi->sf.less_rectangular_check_level == 2 || idx <= 2)
3562         do_rectangular_split &= !partition_none_allowed;
3563     }
3564 
3565     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3566   }  // if (do_split)
3567 
3568   if (cpi->sf.ml_prune_rect_partition && !frame_is_intra_only(cm) &&
3569       (partition_horz_allowed || partition_vert_allowed) &&
3570       !(prune_horz || prune_vert)) {
3571     av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3572     ml_prune_rect_partition(cpi, x, bsize, best_rdc.rdcost, cur_none_rd,
3573                             split_rd, &prune_horz, &prune_vert);
3574   }
3575 
3576   // PARTITION_HORZ
3577   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz_allowed));
3578   if (!terminate_partition_search && partition_horz_allowed && !prune_horz &&
3579       (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
3580       !is_gt_max_sq_part) {
3581     av1_init_rd_stats(&sum_rdc);
3582     subsize = get_partition_subsize(bsize, PARTITION_HORZ);
3583     if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3584     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3585         partition_none_allowed) {
3586       pc_tree->horizontal[0].pred_interp_filter =
3587           av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
3588     }
3589     sum_rdc.rate = partition_cost[PARTITION_HORZ];
3590     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3591     const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
3592                                            ? INT64_MAX
3593                                            : (best_rdc.rdcost - sum_rdc.rdcost);
3594 #if CONFIG_COLLECT_PARTITION_STATS
3595     if (best_remain_rdcost >= 0) {
3596       partition_attempts[PARTITION_HORZ] += 1;
3597       aom_usec_timer_start(&partition_timer);
3598       partition_timer_on = 1;
3599     }
3600 #endif
3601     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_HORZ,
3602                   subsize, &pc_tree->horizontal[0], best_remain_rdcost, 0);
3603 
3604     if (this_rdc.rate == INT_MAX) {
3605       sum_rdc.rdcost = INT64_MAX;
3606     } else {
3607       sum_rdc.rate += this_rdc.rate;
3608       sum_rdc.dist += this_rdc.dist;
3609       sum_rdc.rdcost += this_rdc.rdcost;
3610     }
3611     horz_rd[0] = this_rdc.rdcost;
3612 
3613     if (sum_rdc.rdcost < best_rdc.rdcost && has_rows) {
3614       const PICK_MODE_CONTEXT *const ctx_h = &pc_tree->horizontal[0];
3615       const MB_MODE_INFO *const mbmi = &pc_tree->horizontal[0].mic;
3616       const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3617       // Neither palette mode nor cfl predicted
3618       if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3619         if (mbmi->uv_mode != UV_CFL_PRED) horz_ctx_is_ready = 1;
3620       }
3621       update_state(cpi, tile_data, td, ctx_h, mi_row, mi_col, subsize, 1);
3622       encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
3623                         subsize, NULL);
3624 
3625       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_h);
3626 
3627       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3628           partition_none_allowed) {
3629         pc_tree->horizontal[1].pred_interp_filter =
3630             av1_extract_interp_filter(ctx_h->mic.interp_filters, 0);
3631       }
3632       pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc,
3633                     PARTITION_HORZ, subsize, &pc_tree->horizontal[1],
3634                     best_rdc.rdcost - sum_rdc.rdcost, 0);
3635       horz_rd[1] = this_rdc.rdcost;
3636 
3637       if (this_rdc.rate == INT_MAX) {
3638         sum_rdc.rdcost = INT64_MAX;
3639       } else {
3640         sum_rdc.rate += this_rdc.rate;
3641         sum_rdc.dist += this_rdc.dist;
3642         sum_rdc.rdcost += this_rdc.rdcost;
3643       }
3644     }
3645 #if CONFIG_COLLECT_PARTITION_STATS
3646     if (partition_timer_on) {
3647       aom_usec_timer_mark(&partition_timer);
3648       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3649       partition_times[PARTITION_HORZ] += time;
3650       partition_timer_on = 0;
3651     }
3652 #endif
3653 
3654     if (sum_rdc.rdcost < best_rdc.rdcost) {
3655       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3656       if (sum_rdc.rdcost < best_rdc.rdcost) {
3657         best_rdc = sum_rdc;
3658         pc_tree->partitioning = PARTITION_HORZ;
3659       }
3660     }
3661 
3662     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3663   }
3664 
3665   // PARTITION_VERT
3666   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert_allowed));
3667   if (!terminate_partition_search && partition_vert_allowed && !prune_vert &&
3668       (do_rectangular_split || active_v_edge(cpi, mi_col, mi_step)) &&
3669       !is_gt_max_sq_part) {
3670     av1_init_rd_stats(&sum_rdc);
3671     subsize = get_partition_subsize(bsize, PARTITION_VERT);
3672 
3673     if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3674 
3675     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3676         partition_none_allowed) {
3677       pc_tree->vertical[0].pred_interp_filter =
3678           av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
3679     }
3680     sum_rdc.rate = partition_cost[PARTITION_VERT];
3681     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
3682     const int64_t best_remain_rdcost = best_rdc.rdcost == INT64_MAX
3683                                            ? INT64_MAX
3684                                            : (best_rdc.rdcost - sum_rdc.rdcost);
3685 #if CONFIG_COLLECT_PARTITION_STATS
3686     if (best_remain_rdcost >= 0) {
3687       partition_attempts[PARTITION_VERT] += 1;
3688       aom_usec_timer_start(&partition_timer);
3689       partition_timer_on = 1;
3690     }
3691 #endif
3692     pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, PARTITION_VERT,
3693                   subsize, &pc_tree->vertical[0], best_remain_rdcost, 0);
3694 
3695     if (this_rdc.rate == INT_MAX) {
3696       sum_rdc.rdcost = INT64_MAX;
3697     } else {
3698       sum_rdc.rate += this_rdc.rate;
3699       sum_rdc.dist += this_rdc.dist;
3700       sum_rdc.rdcost += this_rdc.rdcost;
3701     }
3702     vert_rd[0] = this_rdc.rdcost;
3703     if (sum_rdc.rdcost < best_rdc.rdcost && has_cols) {
3704       const MB_MODE_INFO *const mbmi = &pc_tree->vertical[0].mic;
3705       const PALETTE_MODE_INFO *const pmi = &mbmi->palette_mode_info;
3706       // Neither palette mode nor cfl predicted
3707       if (pmi->palette_size[0] == 0 && pmi->palette_size[1] == 0) {
3708         if (mbmi->uv_mode != UV_CFL_PRED) vert_ctx_is_ready = 1;
3709       }
3710       update_state(cpi, tile_data, td, &pc_tree->vertical[0], mi_row, mi_col,
3711                    subsize, 1);
3712       encode_superblock(cpi, tile_data, td, tp, DRY_RUN_NORMAL, mi_row, mi_col,
3713                         subsize, NULL);
3714 
3715       if (cpi->sf.adaptive_motion_search) load_pred_mv(x, ctx_none);
3716 
3717       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
3718           partition_none_allowed) {
3719         pc_tree->vertical[1].pred_interp_filter =
3720             av1_extract_interp_filter(ctx_none->mic.interp_filters, 0);
3721       }
3722       pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc,
3723                     PARTITION_VERT, subsize, &pc_tree->vertical[1],
3724                     best_rdc.rdcost - sum_rdc.rdcost, 0);
3725       vert_rd[1] = this_rdc.rdcost;
3726 
3727       if (this_rdc.rate == INT_MAX) {
3728         sum_rdc.rdcost = INT64_MAX;
3729       } else {
3730         sum_rdc.rate += this_rdc.rate;
3731         sum_rdc.dist += this_rdc.dist;
3732         sum_rdc.rdcost += this_rdc.rdcost;
3733       }
3734     }
3735 #if CONFIG_COLLECT_PARTITION_STATS
3736     if (partition_timer_on) {
3737       aom_usec_timer_mark(&partition_timer);
3738       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3739       partition_times[PARTITION_VERT] += time;
3740       partition_timer_on = 0;
3741     }
3742 #endif
3743 
3744     if (sum_rdc.rdcost < best_rdc.rdcost) {
3745       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
3746       if (sum_rdc.rdcost < best_rdc.rdcost) {
3747         best_rdc = sum_rdc;
3748         pc_tree->partitioning = PARTITION_VERT;
3749       }
3750     }
3751 
3752     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3753   }
3754 
3755   if (pb_source_variance == UINT_MAX) {
3756     av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, bsize);
3757     if (is_cur_buf_hbd(xd)) {
3758       pb_source_variance = av1_high_get_sby_perpixel_variance(
3759           cpi, &x->plane[0].src, bsize, xd->bd);
3760     } else {
3761       pb_source_variance =
3762           av1_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
3763     }
3764   }
3765 
3766   if (use_pb_simple_motion_pred_sse(cpi) &&
3767       pb_simple_motion_pred_sse == UINT_MAX) {
3768     const MV ref_mv_full = { .row = 0, .col = 0 };
3769     unsigned int var = 0;
3770 
3771     av1_simple_motion_sse_var(cpi, x, mi_row, mi_col, bsize, ref_mv_full, 0,
3772                               &pb_simple_motion_pred_sse, &var);
3773   }
3774 
3775   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !do_rectangular_split));
3776 
3777   const int ext_partition_allowed =
3778       do_rectangular_split && bsize > BLOCK_8X8 && partition_none_allowed;
3779 
3780   // The standard AB partitions are allowed whenever ext-partition-types are
3781   // allowed
3782   int horzab_partition_allowed =
3783       ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3784   int vertab_partition_allowed =
3785       ext_partition_allowed & cpi->oxcf.enable_ab_partitions;
3786 
3787 #if CONFIG_DIST_8X8
3788   if (x->using_dist_8x8) {
3789     if (block_size_high[bsize] <= 8 || block_size_wide[bsize] <= 8) {
3790       horzab_partition_allowed = 0;
3791       vertab_partition_allowed = 0;
3792     }
3793   }
3794 #endif
3795 
3796   if (cpi->sf.prune_ext_partition_types_search_level) {
3797     if (cpi->sf.prune_ext_partition_types_search_level == 1) {
3798       // TODO(debargha,huisu@google.com): may need to tune the threshold for
3799       // pb_source_variance.
3800       horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3801                                    (pc_tree->partitioning == PARTITION_NONE &&
3802                                     pb_source_variance < 32) ||
3803                                    pc_tree->partitioning == PARTITION_SPLIT);
3804       vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3805                                    (pc_tree->partitioning == PARTITION_NONE &&
3806                                     pb_source_variance < 32) ||
3807                                    pc_tree->partitioning == PARTITION_SPLIT);
3808     } else {
3809       horzab_partition_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
3810                                    pc_tree->partitioning == PARTITION_SPLIT);
3811       vertab_partition_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
3812                                    pc_tree->partitioning == PARTITION_SPLIT);
3813     }
3814     horz_rd[0] = (horz_rd[0] < INT64_MAX ? horz_rd[0] : 0);
3815     horz_rd[1] = (horz_rd[1] < INT64_MAX ? horz_rd[1] : 0);
3816     vert_rd[0] = (vert_rd[0] < INT64_MAX ? vert_rd[0] : 0);
3817     vert_rd[1] = (vert_rd[1] < INT64_MAX ? vert_rd[1] : 0);
3818     split_rd[0] = (split_rd[0] < INT64_MAX ? split_rd[0] : 0);
3819     split_rd[1] = (split_rd[1] < INT64_MAX ? split_rd[1] : 0);
3820     split_rd[2] = (split_rd[2] < INT64_MAX ? split_rd[2] : 0);
3821     split_rd[3] = (split_rd[3] < INT64_MAX ? split_rd[3] : 0);
3822   }
3823   int horza_partition_allowed = horzab_partition_allowed;
3824   int horzb_partition_allowed = horzab_partition_allowed;
3825   if (cpi->sf.prune_ext_partition_types_search_level) {
3826     const int64_t horz_a_rd = horz_rd[1] + split_rd[0] + split_rd[1];
3827     const int64_t horz_b_rd = horz_rd[0] + split_rd[2] + split_rd[3];
3828     switch (cpi->sf.prune_ext_partition_types_search_level) {
3829       case 1:
3830         horza_partition_allowed &= (horz_a_rd / 16 * 14 < best_rdc.rdcost);
3831         horzb_partition_allowed &= (horz_b_rd / 16 * 14 < best_rdc.rdcost);
3832         break;
3833       case 2:
3834       default:
3835         horza_partition_allowed &= (horz_a_rd / 16 * 15 < best_rdc.rdcost);
3836         horzb_partition_allowed &= (horz_b_rd / 16 * 15 < best_rdc.rdcost);
3837         break;
3838     }
3839   }
3840 
3841   int verta_partition_allowed = vertab_partition_allowed;
3842   int vertb_partition_allowed = vertab_partition_allowed;
3843   if (cpi->sf.prune_ext_partition_types_search_level) {
3844     const int64_t vert_a_rd = vert_rd[1] + split_rd[0] + split_rd[2];
3845     const int64_t vert_b_rd = vert_rd[0] + split_rd[1] + split_rd[3];
3846     switch (cpi->sf.prune_ext_partition_types_search_level) {
3847       case 1:
3848         verta_partition_allowed &= (vert_a_rd / 16 * 14 < best_rdc.rdcost);
3849         vertb_partition_allowed &= (vert_b_rd / 16 * 14 < best_rdc.rdcost);
3850         break;
3851       case 2:
3852       default:
3853         verta_partition_allowed &= (vert_a_rd / 16 * 15 < best_rdc.rdcost);
3854         vertb_partition_allowed &= (vert_b_rd / 16 * 15 < best_rdc.rdcost);
3855         break;
3856     }
3857   }
3858 
3859   if (cpi->sf.ml_prune_ab_partition && ext_partition_allowed &&
3860       partition_horz_allowed && partition_vert_allowed) {
3861     // TODO(huisu@google.com): x->source_variance may not be the current
3862     // block's variance. The correct one to use is pb_source_variance. Need to
3863     // re-train the model to fix it.
3864     ml_prune_ab_partition(bsize, pc_tree->partitioning,
3865                           get_unsigned_bits(x->source_variance),
3866                           best_rdc.rdcost, horz_rd, vert_rd, split_rd,
3867                           &horza_partition_allowed, &horzb_partition_allowed,
3868                           &verta_partition_allowed, &vertb_partition_allowed);
3869   }
3870 
3871   horza_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3872   horzb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3873   verta_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3874   vertb_partition_allowed &= cpi->oxcf.enable_ab_partitions;
3875 
3876   // PARTITION_HORZ_A
3877   if (!terminate_partition_search && partition_horz_allowed &&
3878       horza_partition_allowed && !is_gt_max_sq_part) {
3879     subsize = get_partition_subsize(bsize, PARTITION_HORZ_A);
3880     pc_tree->horizontala[0].rd_mode_is_ready = 0;
3881     pc_tree->horizontala[1].rd_mode_is_ready = 0;
3882     pc_tree->horizontala[2].rd_mode_is_ready = 0;
3883     if (split_ctx_is_ready[0]) {
3884       av1_copy_tree_context(&pc_tree->horizontala[0], &pc_tree->split[0]->none);
3885       pc_tree->horizontala[0].mic.partition = PARTITION_HORZ_A;
3886       pc_tree->horizontala[0].rd_mode_is_ready = 1;
3887       if (split_ctx_is_ready[1]) {
3888         av1_copy_tree_context(&pc_tree->horizontala[1],
3889                               &pc_tree->split[1]->none);
3890         pc_tree->horizontala[1].mic.partition = PARTITION_HORZ_A;
3891         pc_tree->horizontala[1].rd_mode_is_ready = 1;
3892       }
3893     }
3894 #if CONFIG_COLLECT_PARTITION_STATS
3895     {
3896       RD_STATS tmp_sum_rdc;
3897       av1_init_rd_stats(&tmp_sum_rdc);
3898       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_A];
3899       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3900       if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3901         partition_attempts[PARTITION_HORZ_A] += 1;
3902         aom_usec_timer_start(&partition_timer);
3903         partition_timer_on = 1;
3904       }
3905     }
3906 #endif
3907     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
3908                        pc_tree->horizontala, ctx_none, mi_row, mi_col, bsize,
3909                        PARTITION_HORZ_A, mi_row, mi_col, bsize2, mi_row,
3910                        mi_col + mi_step, bsize2, mi_row + mi_step, mi_col,
3911                        subsize);
3912 #if CONFIG_COLLECT_PARTITION_STATS
3913     if (partition_timer_on) {
3914       aom_usec_timer_mark(&partition_timer);
3915       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3916       partition_times[PARTITION_HORZ_A] += time;
3917       partition_timer_on = 0;
3918     }
3919 #endif
3920     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3921   }
3922   // PARTITION_HORZ_B
3923   if (!terminate_partition_search && partition_horz_allowed &&
3924       horzb_partition_allowed && !is_gt_max_sq_part) {
3925     subsize = get_partition_subsize(bsize, PARTITION_HORZ_B);
3926     pc_tree->horizontalb[0].rd_mode_is_ready = 0;
3927     pc_tree->horizontalb[1].rd_mode_is_ready = 0;
3928     pc_tree->horizontalb[2].rd_mode_is_ready = 0;
3929     if (horz_ctx_is_ready) {
3930       av1_copy_tree_context(&pc_tree->horizontalb[0], &pc_tree->horizontal[0]);
3931       pc_tree->horizontalb[0].mic.partition = PARTITION_HORZ_B;
3932       pc_tree->horizontalb[0].rd_mode_is_ready = 1;
3933     }
3934 #if CONFIG_COLLECT_PARTITION_STATS
3935     {
3936       RD_STATS tmp_sum_rdc;
3937       av1_init_rd_stats(&tmp_sum_rdc);
3938       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_HORZ_B];
3939       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3940       if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3941         partition_attempts[PARTITION_HORZ_B] += 1;
3942         aom_usec_timer_start(&partition_timer);
3943         partition_timer_on = 1;
3944       }
3945     }
3946 #endif
3947     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
3948                        pc_tree->horizontalb, ctx_none, mi_row, mi_col, bsize,
3949                        PARTITION_HORZ_B, mi_row, mi_col, subsize,
3950                        mi_row + mi_step, mi_col, bsize2, mi_row + mi_step,
3951                        mi_col + mi_step, bsize2);
3952 
3953 #if CONFIG_COLLECT_PARTITION_STATS
3954     if (partition_timer_on) {
3955       aom_usec_timer_mark(&partition_timer);
3956       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3957       partition_times[PARTITION_HORZ_B] += time;
3958       partition_timer_on = 0;
3959     }
3960 #endif
3961     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
3962   }
3963 
3964   // PARTITION_VERT_A
3965   if (!terminate_partition_search && partition_vert_allowed &&
3966       verta_partition_allowed && !is_gt_max_sq_part) {
3967     subsize = get_partition_subsize(bsize, PARTITION_VERT_A);
3968     pc_tree->verticala[0].rd_mode_is_ready = 0;
3969     pc_tree->verticala[1].rd_mode_is_ready = 0;
3970     pc_tree->verticala[2].rd_mode_is_ready = 0;
3971     if (split_ctx_is_ready[0]) {
3972       av1_copy_tree_context(&pc_tree->verticala[0], &pc_tree->split[0]->none);
3973       pc_tree->verticala[0].mic.partition = PARTITION_VERT_A;
3974       pc_tree->verticala[0].rd_mode_is_ready = 1;
3975     }
3976 #if CONFIG_COLLECT_PARTITION_STATS
3977     {
3978       RD_STATS tmp_sum_rdc;
3979       av1_init_rd_stats(&tmp_sum_rdc);
3980       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_A];
3981       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
3982       if (best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
3983         partition_attempts[PARTITION_VERT_A] += 1;
3984         aom_usec_timer_start(&partition_timer);
3985         partition_timer_on = 1;
3986       }
3987     }
3988 #endif
3989     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
3990                        pc_tree->verticala, ctx_none, mi_row, mi_col, bsize,
3991                        PARTITION_VERT_A, mi_row, mi_col, bsize2,
3992                        mi_row + mi_step, mi_col, bsize2, mi_row,
3993                        mi_col + mi_step, subsize);
3994 #if CONFIG_COLLECT_PARTITION_STATS
3995     if (partition_timer_on) {
3996       aom_usec_timer_mark(&partition_timer);
3997       int64_t time = aom_usec_timer_elapsed(&partition_timer);
3998       partition_times[PARTITION_VERT_A] += time;
3999       partition_timer_on = 0;
4000     }
4001 #endif
4002     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4003   }
4004   // PARTITION_VERT_B
4005   if (!terminate_partition_search && partition_vert_allowed &&
4006       vertb_partition_allowed && !is_gt_max_sq_part) {
4007     subsize = get_partition_subsize(bsize, PARTITION_VERT_B);
4008     pc_tree->verticalb[0].rd_mode_is_ready = 0;
4009     pc_tree->verticalb[1].rd_mode_is_ready = 0;
4010     pc_tree->verticalb[2].rd_mode_is_ready = 0;
4011     if (vert_ctx_is_ready) {
4012       av1_copy_tree_context(&pc_tree->verticalb[0], &pc_tree->vertical[0]);
4013       pc_tree->verticalb[0].mic.partition = PARTITION_VERT_B;
4014       pc_tree->verticalb[0].rd_mode_is_ready = 1;
4015     }
4016 #if CONFIG_COLLECT_PARTITION_STATS
4017     {
4018       RD_STATS tmp_sum_rdc;
4019       av1_init_rd_stats(&tmp_sum_rdc);
4020       tmp_sum_rdc.rate = x->partition_cost[pl][PARTITION_VERT_B];
4021       tmp_sum_rdc.rdcost = RDCOST(x->rdmult, tmp_sum_rdc.rate, 0);
4022       if (!frame_is_intra_only(cm) &&
4023           best_rdc.rdcost - tmp_sum_rdc.rdcost >= 0) {
4024         partition_attempts[PARTITION_VERT_B] += 1;
4025         aom_usec_timer_start(&partition_timer);
4026         partition_timer_on = 1;
4027       }
4028     }
4029 #endif
4030     rd_test_partition3(cpi, td, tile_data, tp, pc_tree, &best_rdc,
4031                        pc_tree->verticalb, ctx_none, mi_row, mi_col, bsize,
4032                        PARTITION_VERT_B, mi_row, mi_col, subsize, mi_row,
4033                        mi_col + mi_step, bsize2, mi_row + mi_step,
4034                        mi_col + mi_step, bsize2);
4035 #if CONFIG_COLLECT_PARTITION_STATS
4036     if (partition_timer_on) {
4037       aom_usec_timer_mark(&partition_timer);
4038       int64_t time = aom_usec_timer_elapsed(&partition_timer);
4039       partition_times[PARTITION_VERT_B] += time;
4040       partition_timer_on = 0;
4041     }
4042 #endif
4043     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4044   }
4045 
4046   // partition4_allowed is 1 if we can use a PARTITION_HORZ_4 or
4047   // PARTITION_VERT_4 for this block. This is almost the same as
4048   // ext_partition_allowed, except that we don't allow 128x32 or 32x128
4049   // blocks, so we require that bsize is not BLOCK_128X128.
4050   const int partition4_allowed = cpi->oxcf.enable_1to4_partitions &&
4051                                  ext_partition_allowed &&
4052                                  bsize != BLOCK_128X128;
4053 
4054   int partition_horz4_allowed = partition4_allowed && partition_horz_allowed;
4055   int partition_vert4_allowed = partition4_allowed && partition_vert_allowed;
4056   if (cpi->sf.prune_ext_partition_types_search_level == 2) {
4057     partition_horz4_allowed &= (pc_tree->partitioning == PARTITION_HORZ ||
4058                                 pc_tree->partitioning == PARTITION_HORZ_A ||
4059                                 pc_tree->partitioning == PARTITION_HORZ_B ||
4060                                 pc_tree->partitioning == PARTITION_SPLIT ||
4061                                 pc_tree->partitioning == PARTITION_NONE);
4062     partition_vert4_allowed &= (pc_tree->partitioning == PARTITION_VERT ||
4063                                 pc_tree->partitioning == PARTITION_VERT_A ||
4064                                 pc_tree->partitioning == PARTITION_VERT_B ||
4065                                 pc_tree->partitioning == PARTITION_SPLIT ||
4066                                 pc_tree->partitioning == PARTITION_NONE);
4067   }
4068   if (cpi->sf.ml_prune_4_partition && partition4_allowed &&
4069       partition_horz_allowed && partition_vert_allowed) {
4070     ml_prune_4_partition(cpi, x, bsize, pc_tree->partitioning, best_rdc.rdcost,
4071                          horz_rd, vert_rd, split_rd, &partition_horz4_allowed,
4072                          &partition_vert4_allowed, pb_source_variance, mi_row,
4073                          mi_col);
4074   }
4075 
4076 #if CONFIG_DIST_8X8
4077   if (x->using_dist_8x8) {
4078     if (block_size_high[bsize] <= 16 || block_size_wide[bsize] <= 16) {
4079       partition_horz4_allowed = 0;
4080       partition_vert4_allowed = 0;
4081     }
4082   }
4083 #endif
4084 
4085   if (blksize < (min_partition_size << 2)) {
4086     partition_horz4_allowed = 0;
4087     partition_vert4_allowed = 0;
4088   }
4089 
4090   // PARTITION_HORZ_4
4091   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_horz4_allowed));
4092   if (!terminate_partition_search && partition_horz4_allowed && has_rows &&
4093       (do_rectangular_split || active_h_edge(cpi, mi_row, mi_step)) &&
4094       !is_gt_max_sq_part) {
4095     av1_init_rd_stats(&sum_rdc);
4096     const int quarter_step = mi_size_high[bsize] / 4;
4097     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
4098 
4099     subsize = get_partition_subsize(bsize, PARTITION_HORZ_4);
4100     sum_rdc.rate = partition_cost[PARTITION_HORZ_4];
4101     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
4102 
4103 #if CONFIG_COLLECT_PARTITION_STATS
4104     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
4105       partition_attempts[PARTITION_HORZ_4] += 1;
4106       aom_usec_timer_start(&partition_timer);
4107       partition_timer_on = 1;
4108     }
4109 #endif
4110     for (int i = 0; i < 4; ++i) {
4111       const int this_mi_row = mi_row + i * quarter_step;
4112 
4113       if (i > 0 && this_mi_row >= cm->mi_rows) break;
4114 
4115       PICK_MODE_CONTEXT *ctx_this = &pc_tree->horizontal4[i];
4116 
4117       ctx_this->rd_mode_is_ready = 0;
4118       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), this_mi_row,
4119                            mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
4120                            PARTITION_HORZ_4, ctx_prev, ctx_this))
4121         break;
4122 
4123       ctx_prev = ctx_this;
4124     }
4125 
4126     if (sum_rdc.rdcost < best_rdc.rdcost) {
4127       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
4128       if (sum_rdc.rdcost < best_rdc.rdcost) {
4129         best_rdc = sum_rdc;
4130         pc_tree->partitioning = PARTITION_HORZ_4;
4131       }
4132     }
4133 
4134 #if CONFIG_COLLECT_PARTITION_STATS
4135     if (partition_timer_on) {
4136       aom_usec_timer_mark(&partition_timer);
4137       int64_t time = aom_usec_timer_elapsed(&partition_timer);
4138       partition_times[PARTITION_HORZ_4] += time;
4139       partition_timer_on = 0;
4140     }
4141 #endif
4142     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4143   }
4144 
4145   // PARTITION_VERT_4
4146   assert(IMPLIES(!cpi->oxcf.enable_rect_partitions, !partition_vert4_allowed));
4147   if (!terminate_partition_search && partition_vert4_allowed && has_cols &&
4148       (do_rectangular_split || active_v_edge(cpi, mi_row, mi_step)) &&
4149       !is_gt_max_sq_part) {
4150     av1_init_rd_stats(&sum_rdc);
4151     const int quarter_step = mi_size_wide[bsize] / 4;
4152     PICK_MODE_CONTEXT *ctx_prev = ctx_none;
4153 
4154     subsize = get_partition_subsize(bsize, PARTITION_VERT_4);
4155     sum_rdc.rate = partition_cost[PARTITION_VERT_4];
4156     sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, 0);
4157 
4158 #if CONFIG_COLLECT_PARTITION_STATS
4159     if (best_rdc.rdcost - sum_rdc.rdcost >= 0) {
4160       partition_attempts[PARTITION_VERT_4] += 1;
4161       aom_usec_timer_start(&partition_timer);
4162       partition_timer_on = 1;
4163     }
4164 #endif
4165     for (int i = 0; i < 4; ++i) {
4166       const int this_mi_col = mi_col + i * quarter_step;
4167 
4168       if (i > 0 && this_mi_col >= cm->mi_cols) break;
4169 
4170       PICK_MODE_CONTEXT *ctx_this = &pc_tree->vertical4[i];
4171 
4172       ctx_this->rd_mode_is_ready = 0;
4173       if (!rd_try_subblock(cpi, td, tile_data, tp, (i == 3), mi_row,
4174                            this_mi_col, subsize, &best_rdc, &sum_rdc, &this_rdc,
4175                            PARTITION_VERT_4, ctx_prev, ctx_this))
4176         break;
4177 
4178       ctx_prev = ctx_this;
4179     }
4180 
4181     if (sum_rdc.rdcost < best_rdc.rdcost) {
4182       sum_rdc.rdcost = RDCOST(x->rdmult, sum_rdc.rate, sum_rdc.dist);
4183       if (sum_rdc.rdcost < best_rdc.rdcost) {
4184         best_rdc = sum_rdc;
4185         pc_tree->partitioning = PARTITION_VERT_4;
4186       }
4187     }
4188 #if CONFIG_COLLECT_PARTITION_STATS
4189     if (partition_timer_on) {
4190       aom_usec_timer_mark(&partition_timer);
4191       int64_t time = aom_usec_timer_elapsed(&partition_timer);
4192       partition_times[PARTITION_VERT_4] += time;
4193       partition_timer_on = 0;
4194     }
4195 #endif
4196     restore_context(x, &x_ctx, mi_row, mi_col, bsize, num_planes);
4197   }
4198 
4199   if (bsize == cm->seq_params.sb_size && best_rdc.rate == INT_MAX) {
4200     // Did not find a valid partition, go back and search again, with less
4201     // constraint on which partition types to search.
4202     x->must_find_valid_partition = 1;
4203 #if CONFIG_COLLECT_PARTITION_STATS == 2
4204     part_stats->partition_redo += 1;
4205 #endif
4206     goto BEGIN_PARTITION_SEARCH;
4207   }
4208 
4209   // TODO(jbb): This code added so that we avoid static analysis
4210   // warning related to the fact that best_rd isn't used after this
4211   // point.  This code should be refactored so that the duplicate
4212   // checks occur in some sub function and thus are used...
4213   (void)best_rd;
4214   *rd_cost = best_rdc;
4215 
4216 #if CONFIG_COLLECT_PARTITION_STATS
4217   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) {
4218     partition_decisions[pc_tree->partitioning] += 1;
4219   }
4220 #endif
4221 
4222 #if CONFIG_COLLECT_PARTITION_STATS == 1
4223   // If CONFIG_COLLECT_PARTITION_STATS is 1, then print out the stats for each
4224   // prediction block
4225   FILE *f = fopen("data.csv", "a");
4226   fprintf(f, "%d,%d,%d,", bsize, cm->show_frame, frame_is_intra_only(cm));
4227   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4228     fprintf(f, "%d,", partition_decisions[idx]);
4229   }
4230   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4231     fprintf(f, "%d,", partition_attempts[idx]);
4232   }
4233   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4234     fprintf(f, "%ld,", partition_times[idx]);
4235   }
4236   fprintf(f, "\n");
4237   fclose(f);
4238 #endif
4239 
4240 #if CONFIG_COLLECT_PARTITION_STATS == 2
4241   // If CONFIG_COLLECTION_PARTITION_STATS is 2, then we print out the stats for
4242   // the whole clip. So we need to pass the information upstream to the encoder
4243   const int bsize_idx = av1_get_bsize_idx_for_part_stats(bsize);
4244   int *agg_attempts = part_stats->partition_attempts[bsize_idx];
4245   int *agg_decisions = part_stats->partition_decisions[bsize_idx];
4246   int64_t *agg_times = part_stats->partition_times[bsize_idx];
4247   for (int idx = 0; idx < EXT_PARTITION_TYPES; idx++) {
4248     agg_attempts[idx] += partition_attempts[idx];
4249     agg_decisions[idx] += partition_decisions[idx];
4250     agg_times[idx] += partition_times[idx];
4251   }
4252 #endif
4253 
4254   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
4255       pc_tree->index != 3) {
4256     if (bsize == cm->seq_params.sb_size) {
4257       x->cb_offset = 0;
4258       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, OUTPUT_ENABLED, bsize,
4259                 pc_tree, NULL);
4260     } else {
4261       encode_sb(cpi, td, tile_data, tp, mi_row, mi_col, DRY_RUN_NORMAL, bsize,
4262                 pc_tree, NULL);
4263     }
4264   }
4265 
4266   if (bsize == cm->seq_params.sb_size) {
4267     assert(best_rdc.rate < INT_MAX);
4268     assert(best_rdc.dist < INT64_MAX);
4269   } else {
4270     assert(tp_orig == *tp);
4271   }
4272 }
4273 #undef NUM_SIMPLE_MOTION_FEATURES
4274 
4275 // Set all the counters as max.
init_first_partition_pass_stats_tables(AV1_COMP * cpi,FIRST_PARTITION_PASS_STATS * stats)4276 static void init_first_partition_pass_stats_tables(
4277     AV1_COMP *cpi, FIRST_PARTITION_PASS_STATS *stats) {
4278   for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
4279     memset(stats[i].ref0_counts, 0xff, sizeof(stats[i].ref0_counts));
4280     memset(stats[i].ref1_counts, 0xff, sizeof(stats[i].ref1_counts));
4281     stats[i].sample_counts = INT_MAX;
4282     if (cpi->sf.use_first_partition_pass_interintra_stats)
4283       memset(stats[i].interintra_motion_mode_count, 0xff,
4284              sizeof(stats[i].interintra_motion_mode_count));
4285   }
4286 }
4287 
4288 // Minimum number of samples to trigger the mode pruning in
4289 // two_pass_partition_search feature.
4290 #define FIRST_PARTITION_PASS_MIN_SAMPLES 16
4291 
get_rdmult_delta(AV1_COMP * cpi,BLOCK_SIZE bsize,int mi_row,int mi_col,int orig_rdmult)4292 static int get_rdmult_delta(AV1_COMP *cpi, BLOCK_SIZE bsize, int mi_row,
4293                             int mi_col, int orig_rdmult) {
4294   TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
4295   TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
4296   int tpl_stride = tpl_frame->stride;
4297   int64_t intra_cost = 0;
4298   int64_t mc_dep_cost = 0;
4299   int mi_wide = mi_size_wide[bsize];
4300   int mi_high = mi_size_high[bsize];
4301   int row, col;
4302 
4303   int dr = 0;
4304   double r0, rk, beta;
4305 
4306   if (tpl_frame->is_valid == 0) return orig_rdmult;
4307 
4308   if (cpi->common.show_frame) return orig_rdmult;
4309 
4310   if (cpi->twopass.gf_group.index >= MAX_LAG_BUFFERS) return orig_rdmult;
4311 
4312   for (row = mi_row; row < mi_row + mi_high; ++row) {
4313     for (col = mi_col; col < mi_col + mi_wide; ++col) {
4314       TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
4315 
4316       if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue;
4317 
4318       intra_cost += this_stats->intra_cost;
4319       mc_dep_cost += this_stats->mc_dep_cost;
4320     }
4321   }
4322 
4323   aom_clear_system_state();
4324 
4325   r0 = cpi->rd.r0;
4326   rk = (double)intra_cost / mc_dep_cost;
4327   beta = r0 / rk;
4328   dr = av1_get_adaptive_rdmult(cpi, beta);
4329 
4330   dr = AOMMIN(dr, orig_rdmult * 3 / 2);
4331   dr = AOMMAX(dr, orig_rdmult * 1 / 2);
4332 
4333   dr = AOMMAX(1, dr);
4334 
4335   return dr;
4336 }
4337 
setup_delta_q(AV1_COMP * const cpi,MACROBLOCK * const x,const TileInfo * const tile_info,int mi_row,int mi_col,int num_planes)4338 static void setup_delta_q(AV1_COMP *const cpi, MACROBLOCK *const x,
4339                           const TileInfo *const tile_info, int mi_row,
4340                           int mi_col, int num_planes) {
4341   AV1_COMMON *const cm = &cpi->common;
4342   MACROBLOCKD *const xd = &x->e_mbd;
4343   const DeltaQInfo *const delta_q_info = &cm->delta_q_info;
4344   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4345   const int mib_size = cm->seq_params.mib_size;
4346 
4347   // Delta-q modulation based on variance
4348   av1_setup_src_planes(x, cpi->source, mi_row, mi_col, num_planes, sb_size);
4349 
4350   int offset_qindex;
4351   if (DELTAQ_MODULATION == 1) {
4352     const int block_wavelet_energy_level =
4353         av1_block_wavelet_energy_level(cpi, x, sb_size);
4354     x->sb_energy_level = block_wavelet_energy_level;
4355     offset_qindex =
4356         av1_compute_deltaq_from_energy_level(cpi, block_wavelet_energy_level);
4357   } else {
4358     const int block_var_level = av1_log_block_var(cpi, x, sb_size);
4359     x->sb_energy_level = block_var_level;
4360     offset_qindex = av1_compute_deltaq_from_energy_level(cpi, block_var_level);
4361   }
4362   const int qmask = ~(delta_q_info->delta_q_res - 1);
4363   int current_qindex =
4364       clamp(cm->base_qindex + offset_qindex, delta_q_info->delta_q_res,
4365             256 - delta_q_info->delta_q_res);
4366   current_qindex =
4367       ((current_qindex - cm->base_qindex + delta_q_info->delta_q_res / 2) &
4368        qmask) +
4369       cm->base_qindex;
4370   assert(current_qindex > 0);
4371 
4372   xd->delta_qindex = current_qindex - cm->base_qindex;
4373   set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4374   xd->mi[0]->current_qindex = current_qindex;
4375   av1_init_plane_quantizers(cpi, x, xd->mi[0]->segment_id);
4376   if (cpi->oxcf.deltaq_mode == DELTA_Q_LF) {
4377     const int lfmask = ~(delta_q_info->delta_lf_res - 1);
4378     const int delta_lf_from_base =
4379         ((offset_qindex / 2 + delta_q_info->delta_lf_res / 2) & lfmask);
4380 
4381     // pre-set the delta lf for loop filter. Note that this value is set
4382     // before mi is assigned for each block in current superblock
4383     for (int j = 0; j < AOMMIN(mib_size, cm->mi_rows - mi_row); j++) {
4384       for (int k = 0; k < AOMMIN(mib_size, cm->mi_cols - mi_col); k++) {
4385         cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)].delta_lf_from_base =
4386             clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
4387         const int frame_lf_count =
4388             av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
4389         for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id) {
4390           cm->mi[(mi_row + j) * cm->mi_stride + (mi_col + k)].delta_lf[lf_id] =
4391               clamp(delta_lf_from_base, -MAX_LOOP_FILTER, MAX_LOOP_FILTER);
4392         }
4393       }
4394     }
4395   }
4396 }
4397 
4398 // First pass of partition search only considers square partition block sizes.
4399 // The results will be used in the second partition search pass to prune
4400 // unlikely partition candidates.
first_partition_search_pass(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,int mi_col,TOKENEXTRA ** tp)4401 static void first_partition_search_pass(AV1_COMP *cpi, ThreadData *td,
4402                                         TileDataEnc *tile_data, int mi_row,
4403                                         int mi_col, TOKENEXTRA **tp) {
4404   MACROBLOCK *const x = &td->mb;
4405   x->cb_partition_scan = 1;
4406 
4407   const SPEED_FEATURES *const sf = &cpi->sf;
4408   // Reset the stats tables.
4409   av1_zero(x->first_partition_pass_stats);
4410 
4411   AV1_COMMON *const cm = &cpi->common;
4412   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4413   const int mib_size_log2 = cm->seq_params.mib_size_log2;
4414   PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2];
4415   RD_STATS dummy_rdc;
4416   rd_pick_sqr_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4417                         &dummy_rdc, INT64_MAX, pc_root, NULL);
4418   x->cb_partition_scan = 0;
4419 
4420   x->source_variance = UINT_MAX;
4421   x->simple_motion_pred_sse = UINT_MAX;
4422   if (sf->adaptive_pred_interp_filter) {
4423     const int leaf_nodes = 256;
4424     for (int i = 0; i < leaf_nodes; ++i) {
4425       td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
4426       td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
4427       td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
4428       td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
4429     }
4430   }
4431 
4432   x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
4433   av1_zero(x->txb_rd_record_8X8);
4434   av1_zero(x->txb_rd_record_16X16);
4435   av1_zero(x->txb_rd_record_32X32);
4436   av1_zero(x->txb_rd_record_64X64);
4437   av1_zero(x->txb_rd_record_intra);
4438   av1_zero(x->pred_mv);
4439   pc_root->index = 0;
4440 
4441   for (int idy = 0; idy < mi_size_high[sb_size]; ++idy) {
4442     for (int idx = 0; idx < mi_size_wide[sb_size]; ++idx) {
4443       const int offset = cm->mi_stride * (mi_row + idy) + (mi_col + idx);
4444       cm->mi_grid_visible[offset] = 0;
4445     }
4446   }
4447 
4448   x->use_cb_search_range = 1;
4449 
4450   for (int i = 0; i < FIRST_PARTITION_PASS_STATS_TABLES; ++i) {
4451     FIRST_PARTITION_PASS_STATS *const stat = &x->first_partition_pass_stats[i];
4452     if (stat->sample_counts < FIRST_PARTITION_PASS_MIN_SAMPLES) {
4453       // If there are not enough samples collected, make all available.
4454       memset(stat->ref0_counts, 0xff, sizeof(stat->ref0_counts));
4455       memset(stat->ref1_counts, 0xff, sizeof(stat->ref1_counts));
4456       if (cpi->sf.use_first_partition_pass_interintra_stats)
4457         memset(stat->interintra_motion_mode_count, 0xff,
4458                sizeof(stat->interintra_motion_mode_count));
4459     } else if (sf->selective_ref_frame < 3) {
4460       // ALTREF2_FRAME and BWDREF_FRAME may be skipped during the
4461       // initial partition scan, so we don't eliminate them.
4462       stat->ref0_counts[ALTREF2_FRAME] = 0xff;
4463       stat->ref1_counts[ALTREF2_FRAME] = 0xff;
4464       stat->ref0_counts[BWDREF_FRAME] = 0xff;
4465       stat->ref1_counts[BWDREF_FRAME] = 0xff;
4466       if (cpi->sf.use_first_partition_pass_interintra_stats) {
4467         stat->interintra_motion_mode_count[ALTREF2_FRAME] = 0xff;
4468         stat->interintra_motion_mode_count[BWDREF_FRAME] = 0xff;
4469       }
4470     }
4471   }
4472 }
4473 
4474 #define AVG_CDF_WEIGHT_LEFT 3
4475 #define AVG_CDF_WEIGHT_TOP_RIGHT 1
4476 
avg_cdf_symbol(aom_cdf_prob * cdf_ptr_left,aom_cdf_prob * cdf_ptr_tr,int num_cdfs,int cdf_stride,int nsymbs,int wt_left,int wt_tr)4477 static void avg_cdf_symbol(aom_cdf_prob *cdf_ptr_left, aom_cdf_prob *cdf_ptr_tr,
4478                            int num_cdfs, int cdf_stride, int nsymbs,
4479                            int wt_left, int wt_tr) {
4480   for (int i = 0; i < num_cdfs; i++) {
4481     for (int j = 0; j <= nsymbs; j++) {
4482       cdf_ptr_left[i * cdf_stride + j] =
4483           (aom_cdf_prob)(((int)cdf_ptr_left[i * cdf_stride + j] * wt_left +
4484                           (int)cdf_ptr_tr[i * cdf_stride + j] * wt_tr +
4485                           ((wt_left + wt_tr) / 2)) /
4486                          (wt_left + wt_tr));
4487       assert(cdf_ptr_left[i * cdf_stride + j] >= 0 &&
4488              cdf_ptr_left[i * cdf_stride + j] < CDF_PROB_TOP);
4489     }
4490   }
4491 }
4492 
4493 #define AVERAGE_CDF(cname_left, cname_tr, nsymbs) \
4494   AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, CDF_SIZE(nsymbs))
4495 
4496 #define AVG_CDF_STRIDE(cname_left, cname_tr, nsymbs, cdf_stride)           \
4497   do {                                                                     \
4498     aom_cdf_prob *cdf_ptr_left = (aom_cdf_prob *)cname_left;               \
4499     aom_cdf_prob *cdf_ptr_tr = (aom_cdf_prob *)cname_tr;                   \
4500     int array_size = (int)sizeof(cname_left) / sizeof(aom_cdf_prob);       \
4501     int num_cdfs = array_size / cdf_stride;                                \
4502     avg_cdf_symbol(cdf_ptr_left, cdf_ptr_tr, num_cdfs, cdf_stride, nsymbs, \
4503                    wt_left, wt_tr);                                        \
4504   } while (0)
4505 
avg_nmv(nmv_context * nmv_left,nmv_context * nmv_tr,int wt_left,int wt_tr)4506 static void avg_nmv(nmv_context *nmv_left, nmv_context *nmv_tr, int wt_left,
4507                     int wt_tr) {
4508   AVERAGE_CDF(nmv_left->joints_cdf, nmv_tr->joints_cdf, 4);
4509   for (int i = 0; i < 2; i++) {
4510     AVERAGE_CDF(nmv_left->comps[i].classes_cdf, nmv_tr->comps[i].classes_cdf,
4511                 MV_CLASSES);
4512     AVERAGE_CDF(nmv_left->comps[i].class0_fp_cdf,
4513                 nmv_tr->comps[i].class0_fp_cdf, MV_FP_SIZE);
4514     AVERAGE_CDF(nmv_left->comps[i].fp_cdf, nmv_tr->comps[i].fp_cdf, MV_FP_SIZE);
4515     AVERAGE_CDF(nmv_left->comps[i].sign_cdf, nmv_tr->comps[i].sign_cdf, 2);
4516     AVERAGE_CDF(nmv_left->comps[i].class0_hp_cdf,
4517                 nmv_tr->comps[i].class0_hp_cdf, 2);
4518     AVERAGE_CDF(nmv_left->comps[i].hp_cdf, nmv_tr->comps[i].hp_cdf, 2);
4519     AVERAGE_CDF(nmv_left->comps[i].class0_cdf, nmv_tr->comps[i].class0_cdf,
4520                 CLASS0_SIZE);
4521     AVERAGE_CDF(nmv_left->comps[i].bits_cdf, nmv_tr->comps[i].bits_cdf, 2);
4522   }
4523 }
4524 
4525 // In case of row-based multi-threading of encoder, since we always
4526 // keep a top - right sync, we can average the top - right SB's CDFs and
4527 // the left SB's CDFs and use the same for current SB's encoding to
4528 // improve the performance. This function facilitates the averaging
4529 // of CDF and used only when row-mt is enabled in encoder.
avg_cdf_symbols(FRAME_CONTEXT * ctx_left,FRAME_CONTEXT * ctx_tr,int wt_left,int wt_tr)4530 static void avg_cdf_symbols(FRAME_CONTEXT *ctx_left, FRAME_CONTEXT *ctx_tr,
4531                             int wt_left, int wt_tr) {
4532   AVERAGE_CDF(ctx_left->txb_skip_cdf, ctx_tr->txb_skip_cdf, 2);
4533   AVERAGE_CDF(ctx_left->eob_extra_cdf, ctx_tr->eob_extra_cdf, 2);
4534   AVERAGE_CDF(ctx_left->dc_sign_cdf, ctx_tr->dc_sign_cdf, 2);
4535   AVERAGE_CDF(ctx_left->eob_flag_cdf16, ctx_tr->eob_flag_cdf16, 5);
4536   AVERAGE_CDF(ctx_left->eob_flag_cdf32, ctx_tr->eob_flag_cdf32, 6);
4537   AVERAGE_CDF(ctx_left->eob_flag_cdf64, ctx_tr->eob_flag_cdf64, 7);
4538   AVERAGE_CDF(ctx_left->eob_flag_cdf128, ctx_tr->eob_flag_cdf128, 8);
4539   AVERAGE_CDF(ctx_left->eob_flag_cdf256, ctx_tr->eob_flag_cdf256, 9);
4540   AVERAGE_CDF(ctx_left->eob_flag_cdf512, ctx_tr->eob_flag_cdf512, 10);
4541   AVERAGE_CDF(ctx_left->eob_flag_cdf1024, ctx_tr->eob_flag_cdf1024, 11);
4542   AVERAGE_CDF(ctx_left->coeff_base_eob_cdf, ctx_tr->coeff_base_eob_cdf, 3);
4543   AVERAGE_CDF(ctx_left->coeff_base_cdf, ctx_tr->coeff_base_cdf, 4);
4544   AVERAGE_CDF(ctx_left->coeff_br_cdf, ctx_tr->coeff_br_cdf, BR_CDF_SIZE);
4545   AVERAGE_CDF(ctx_left->newmv_cdf, ctx_tr->newmv_cdf, 2);
4546   AVERAGE_CDF(ctx_left->zeromv_cdf, ctx_tr->zeromv_cdf, 2);
4547   AVERAGE_CDF(ctx_left->refmv_cdf, ctx_tr->refmv_cdf, 2);
4548   AVERAGE_CDF(ctx_left->drl_cdf, ctx_tr->drl_cdf, 2);
4549   AVERAGE_CDF(ctx_left->inter_compound_mode_cdf,
4550               ctx_tr->inter_compound_mode_cdf, INTER_COMPOUND_MODES);
4551   AVERAGE_CDF(ctx_left->compound_type_cdf, ctx_tr->compound_type_cdf,
4552               MASKED_COMPOUND_TYPES);
4553   AVERAGE_CDF(ctx_left->wedge_idx_cdf, ctx_tr->wedge_idx_cdf, 16);
4554   AVERAGE_CDF(ctx_left->interintra_cdf, ctx_tr->interintra_cdf, 2);
4555   AVERAGE_CDF(ctx_left->wedge_interintra_cdf, ctx_tr->wedge_interintra_cdf, 2);
4556   AVERAGE_CDF(ctx_left->interintra_mode_cdf, ctx_tr->interintra_mode_cdf,
4557               INTERINTRA_MODES);
4558   AVERAGE_CDF(ctx_left->motion_mode_cdf, ctx_tr->motion_mode_cdf, MOTION_MODES);
4559   AVERAGE_CDF(ctx_left->obmc_cdf, ctx_tr->obmc_cdf, 2);
4560   AVERAGE_CDF(ctx_left->palette_y_size_cdf, ctx_tr->palette_y_size_cdf,
4561               PALETTE_SIZES);
4562   AVERAGE_CDF(ctx_left->palette_uv_size_cdf, ctx_tr->palette_uv_size_cdf,
4563               PALETTE_SIZES);
4564   for (int j = 0; j < PALETTE_SIZES; j++) {
4565     int nsymbs = j + PALETTE_MIN_SIZE;
4566     AVG_CDF_STRIDE(ctx_left->palette_y_color_index_cdf[j],
4567                    ctx_tr->palette_y_color_index_cdf[j], nsymbs,
4568                    CDF_SIZE(PALETTE_COLORS));
4569     AVG_CDF_STRIDE(ctx_left->palette_uv_color_index_cdf[j],
4570                    ctx_tr->palette_uv_color_index_cdf[j], nsymbs,
4571                    CDF_SIZE(PALETTE_COLORS));
4572   }
4573   AVERAGE_CDF(ctx_left->palette_y_mode_cdf, ctx_tr->palette_y_mode_cdf, 2);
4574   AVERAGE_CDF(ctx_left->palette_uv_mode_cdf, ctx_tr->palette_uv_mode_cdf, 2);
4575   AVERAGE_CDF(ctx_left->comp_inter_cdf, ctx_tr->comp_inter_cdf, 2);
4576   AVERAGE_CDF(ctx_left->single_ref_cdf, ctx_tr->single_ref_cdf, 2);
4577   AVERAGE_CDF(ctx_left->comp_ref_type_cdf, ctx_tr->comp_ref_type_cdf, 2);
4578   AVERAGE_CDF(ctx_left->uni_comp_ref_cdf, ctx_tr->uni_comp_ref_cdf, 2);
4579   AVERAGE_CDF(ctx_left->comp_ref_cdf, ctx_tr->comp_ref_cdf, 2);
4580   AVERAGE_CDF(ctx_left->comp_bwdref_cdf, ctx_tr->comp_bwdref_cdf, 2);
4581   AVERAGE_CDF(ctx_left->txfm_partition_cdf, ctx_tr->txfm_partition_cdf, 2);
4582   AVERAGE_CDF(ctx_left->compound_index_cdf, ctx_tr->compound_index_cdf, 2);
4583   AVERAGE_CDF(ctx_left->comp_group_idx_cdf, ctx_tr->comp_group_idx_cdf, 2);
4584   AVERAGE_CDF(ctx_left->skip_mode_cdfs, ctx_tr->skip_mode_cdfs, 2);
4585   AVERAGE_CDF(ctx_left->skip_cdfs, ctx_tr->skip_cdfs, 2);
4586   AVERAGE_CDF(ctx_left->intra_inter_cdf, ctx_tr->intra_inter_cdf, 2);
4587   avg_nmv(&ctx_left->nmvc, &ctx_tr->nmvc, wt_left, wt_tr);
4588   avg_nmv(&ctx_left->ndvc, &ctx_tr->ndvc, wt_left, wt_tr);
4589   AVERAGE_CDF(ctx_left->intrabc_cdf, ctx_tr->intrabc_cdf, 2);
4590   AVERAGE_CDF(ctx_left->seg.tree_cdf, ctx_tr->seg.tree_cdf, MAX_SEGMENTS);
4591   AVERAGE_CDF(ctx_left->seg.pred_cdf, ctx_tr->seg.pred_cdf, 2);
4592   AVERAGE_CDF(ctx_left->seg.spatial_pred_seg_cdf,
4593               ctx_tr->seg.spatial_pred_seg_cdf, MAX_SEGMENTS);
4594   AVERAGE_CDF(ctx_left->filter_intra_cdfs, ctx_tr->filter_intra_cdfs, 2);
4595   AVERAGE_CDF(ctx_left->filter_intra_mode_cdf, ctx_tr->filter_intra_mode_cdf,
4596               FILTER_INTRA_MODES);
4597   AVERAGE_CDF(ctx_left->switchable_restore_cdf, ctx_tr->switchable_restore_cdf,
4598               RESTORE_SWITCHABLE_TYPES);
4599   AVERAGE_CDF(ctx_left->wiener_restore_cdf, ctx_tr->wiener_restore_cdf, 2);
4600   AVERAGE_CDF(ctx_left->sgrproj_restore_cdf, ctx_tr->sgrproj_restore_cdf, 2);
4601   AVERAGE_CDF(ctx_left->y_mode_cdf, ctx_tr->y_mode_cdf, INTRA_MODES);
4602   AVG_CDF_STRIDE(ctx_left->uv_mode_cdf[0], ctx_tr->uv_mode_cdf[0],
4603                  UV_INTRA_MODES - 1, CDF_SIZE(UV_INTRA_MODES));
4604   AVERAGE_CDF(ctx_left->uv_mode_cdf[1], ctx_tr->uv_mode_cdf[1], UV_INTRA_MODES);
4605   for (int i = 0; i < PARTITION_CONTEXTS; i++) {
4606     if (i < 4) {
4607       AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 4,
4608                      CDF_SIZE(10));
4609     } else if (i < 16) {
4610       AVERAGE_CDF(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 10);
4611     } else {
4612       AVG_CDF_STRIDE(ctx_left->partition_cdf[i], ctx_tr->partition_cdf[i], 8,
4613                      CDF_SIZE(10));
4614     }
4615   }
4616   AVERAGE_CDF(ctx_left->switchable_interp_cdf, ctx_tr->switchable_interp_cdf,
4617               SWITCHABLE_FILTERS);
4618   AVERAGE_CDF(ctx_left->kf_y_cdf, ctx_tr->kf_y_cdf, INTRA_MODES);
4619   AVERAGE_CDF(ctx_left->angle_delta_cdf, ctx_tr->angle_delta_cdf,
4620               2 * MAX_ANGLE_DELTA + 1);
4621   AVG_CDF_STRIDE(ctx_left->tx_size_cdf[0], ctx_tr->tx_size_cdf[0], MAX_TX_DEPTH,
4622                  CDF_SIZE(MAX_TX_DEPTH + 1));
4623   AVERAGE_CDF(ctx_left->tx_size_cdf[1], ctx_tr->tx_size_cdf[1],
4624               MAX_TX_DEPTH + 1);
4625   AVERAGE_CDF(ctx_left->tx_size_cdf[2], ctx_tr->tx_size_cdf[2],
4626               MAX_TX_DEPTH + 1);
4627   AVERAGE_CDF(ctx_left->tx_size_cdf[3], ctx_tr->tx_size_cdf[3],
4628               MAX_TX_DEPTH + 1);
4629   AVERAGE_CDF(ctx_left->delta_q_cdf, ctx_tr->delta_q_cdf, DELTA_Q_PROBS + 1);
4630   AVERAGE_CDF(ctx_left->delta_lf_cdf, ctx_tr->delta_lf_cdf, DELTA_LF_PROBS + 1);
4631   for (int i = 0; i < FRAME_LF_COUNT; i++) {
4632     AVERAGE_CDF(ctx_left->delta_lf_multi_cdf[i], ctx_tr->delta_lf_multi_cdf[i],
4633                 DELTA_LF_PROBS + 1);
4634   }
4635   AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[1], ctx_tr->intra_ext_tx_cdf[1], 7,
4636                  CDF_SIZE(TX_TYPES));
4637   AVG_CDF_STRIDE(ctx_left->intra_ext_tx_cdf[2], ctx_tr->intra_ext_tx_cdf[2], 5,
4638                  CDF_SIZE(TX_TYPES));
4639   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[1], ctx_tr->inter_ext_tx_cdf[1], 16,
4640                  CDF_SIZE(TX_TYPES));
4641   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[2], ctx_tr->inter_ext_tx_cdf[2], 12,
4642                  CDF_SIZE(TX_TYPES));
4643   AVG_CDF_STRIDE(ctx_left->inter_ext_tx_cdf[3], ctx_tr->inter_ext_tx_cdf[3], 2,
4644                  CDF_SIZE(TX_TYPES));
4645   AVERAGE_CDF(ctx_left->cfl_sign_cdf, ctx_tr->cfl_sign_cdf, CFL_JOINT_SIGNS);
4646   AVERAGE_CDF(ctx_left->cfl_alpha_cdf, ctx_tr->cfl_alpha_cdf,
4647               CFL_ALPHABET_SIZE);
4648 }
4649 
encode_sb_row(AV1_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp,int use_nonrd_mode)4650 static void encode_sb_row(AV1_COMP *cpi, ThreadData *td, TileDataEnc *tile_data,
4651                           int mi_row, TOKENEXTRA **tp, int use_nonrd_mode) {
4652   AV1_COMMON *const cm = &cpi->common;
4653   const int num_planes = av1_num_planes(cm);
4654   const TileInfo *const tile_info = &tile_data->tile_info;
4655   MACROBLOCK *const x = &td->mb;
4656   MACROBLOCKD *const xd = &x->e_mbd;
4657   const SPEED_FEATURES *const sf = &cpi->sf;
4658   const int leaf_nodes = 256;
4659   const int sb_cols_in_tile = av1_get_sb_cols_in_tile(cm, tile_data->tile_info);
4660   const BLOCK_SIZE sb_size = cm->seq_params.sb_size;
4661   const int mib_size = cm->seq_params.mib_size;
4662   const int mib_size_log2 = cm->seq_params.mib_size_log2;
4663   const int sb_row = (mi_row - tile_info->mi_row_start) >> mib_size_log2;
4664 
4665 #if CONFIG_COLLECT_COMPONENT_TIMING
4666   start_timing(cpi, encode_sb_time);
4667 #endif
4668 
4669   // Initialize the left context for the new SB row
4670   av1_zero_left_context(xd);
4671 
4672   // Reset delta for every tile
4673   if (mi_row == tile_info->mi_row_start) {
4674     if (cm->delta_q_info.delta_q_present_flag)
4675       xd->current_qindex = cm->base_qindex;
4676     if (cm->delta_q_info.delta_lf_present_flag) {
4677       av1_reset_loop_filter_delta(xd, av1_num_planes(cm));
4678     }
4679   }
4680 
4681   // Code each SB in the row
4682   for (int mi_col = tile_info->mi_col_start, sb_col_in_tile = 0;
4683        mi_col < tile_info->mi_col_end; mi_col += mib_size, sb_col_in_tile++) {
4684     (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row,
4685                                    sb_col_in_tile);
4686     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4687         (tile_info->mi_row_start != mi_row)) {
4688       if ((tile_info->mi_col_start == mi_col)) {
4689         // restore frame context of 1st column sb
4690         memcpy(xd->tile_ctx, x->row_ctx, sizeof(*xd->tile_ctx));
4691       } else {
4692         int wt_left = AVG_CDF_WEIGHT_LEFT;
4693         int wt_tr = AVG_CDF_WEIGHT_TOP_RIGHT;
4694         if (tile_info->mi_col_end > (mi_col + mib_size))
4695           avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile, wt_left,
4696                           wt_tr);
4697         else
4698           avg_cdf_symbols(xd->tile_ctx, x->row_ctx + sb_col_in_tile - 1,
4699                           wt_left, wt_tr);
4700       }
4701     }
4702 
4703     switch (cpi->oxcf.coeff_cost_upd_freq) {
4704       case COST_UPD_TILE:  // Tile level
4705         if (mi_row != tile_info->mi_row_start) break;
4706         AOM_FALLTHROUGH_INTENDED;
4707       case COST_UPD_SBROW:  // SB row level in tile
4708         if (mi_col != tile_info->mi_col_start) break;
4709         AOM_FALLTHROUGH_INTENDED;
4710       case COST_UPD_SB:  // SB level
4711         av1_fill_coeff_costs(&td->mb, xd->tile_ctx, num_planes);
4712         break;
4713       default: assert(0);
4714     }
4715 
4716     switch (cpi->oxcf.mode_cost_upd_freq) {
4717       case COST_UPD_TILE:  // Tile level
4718         if (mi_row != tile_info->mi_row_start) break;
4719         AOM_FALLTHROUGH_INTENDED;
4720       case COST_UPD_SBROW:  // SB row level in tile
4721         if (mi_col != tile_info->mi_col_start) break;
4722         AOM_FALLTHROUGH_INTENDED;
4723       case COST_UPD_SB:  // SB level
4724         av1_fill_mode_rates(cm, x, xd->tile_ctx);
4725         break;
4726       default: assert(0);
4727     }
4728 
4729     if (sf->adaptive_pred_interp_filter) {
4730       for (int i = 0; i < leaf_nodes; ++i) {
4731         td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
4732         td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
4733         td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
4734         td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
4735       }
4736     }
4737 
4738     x->mb_rd_record.num = x->mb_rd_record.index_start = 0;
4739 
4740     if (!use_nonrd_mode) {
4741       av1_zero(x->txb_rd_record_8X8);
4742       av1_zero(x->txb_rd_record_16X16);
4743       av1_zero(x->txb_rd_record_32X32);
4744       av1_zero(x->txb_rd_record_64X64);
4745       av1_zero(x->txb_rd_record_intra);
4746     }
4747 
4748     av1_zero(x->picked_ref_frames_mask);
4749 
4750     av1_zero(x->pred_mv);
4751     PC_TREE *const pc_root = td->pc_root[mib_size_log2 - MIN_MIB_SIZE_LOG2];
4752     pc_root->index = 0;
4753 
4754     if ((sf->simple_motion_search_prune_rect ||
4755          sf->simple_motion_search_early_term_none ||
4756          sf->firstpass_simple_motion_search_early_term) &&
4757         !frame_is_intra_only(cm)) {
4758       init_simple_motion_search_mvs(pc_root);
4759     }
4760 
4761     const struct segmentation *const seg = &cm->seg;
4762     int seg_skip = 0;
4763     if (seg->enabled) {
4764       const uint8_t *const map =
4765           seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map;
4766       const int segment_id =
4767           map ? get_segment_id(cm, map, sb_size, mi_row, mi_col) : 0;
4768       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
4769     }
4770     xd->cur_frame_force_integer_mv = cm->cur_frame_force_integer_mv;
4771 
4772     x->sb_energy_level = 0;
4773     if (cm->delta_q_info.delta_q_present_flag)
4774       setup_delta_q(cpi, x, tile_info, mi_row, mi_col, num_planes);
4775 
4776     int dummy_rate;
4777     int64_t dummy_dist;
4778     RD_STATS dummy_rdc;
4779     const int idx_str = cm->mi_stride * mi_row + mi_col;
4780     MB_MODE_INFO **mi = cm->mi_grid_visible + idx_str;
4781     x->source_variance = UINT_MAX;
4782     x->simple_motion_pred_sse = UINT_MAX;
4783     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
4784       set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4785       const BLOCK_SIZE bsize = seg_skip ? sb_size : sf->always_this_block_size;
4786       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4787       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4788                        &dummy_rate, &dummy_dist, 1, pc_root);
4789     } else if (cpi->partition_search_skippable_frame) {
4790       set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4791       const BLOCK_SIZE bsize =
4792           get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
4793       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
4794       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4795                        &dummy_rate, &dummy_dist, 1, pc_root);
4796     } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
4797                use_nonrd_mode) {
4798       set_offsets(cpi, tile_info, x, mi_row, mi_col, sb_size);
4799       av1_choose_var_based_partitioning(cpi, tile_info, x, mi_row, mi_col);
4800       nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, sb_size,
4801                           &dummy_rate, &dummy_dist, 1, pc_root);
4802 
4803     } else {
4804       const int orig_rdmult = cpi->rd.RDMULT;
4805       x->cb_rdmult = orig_rdmult;
4806       if (cpi->twopass.gf_group.index > 0 && cpi->oxcf.enable_tpl_model &&
4807           cpi->oxcf.aq_mode == NO_AQ && cpi->oxcf.deltaq_mode == 0) {
4808         const int dr =
4809             get_rdmult_delta(cpi, BLOCK_128X128, mi_row, mi_col, orig_rdmult);
4810 
4811         x->cb_rdmult = dr;
4812         x->rdmult = x->cb_rdmult;
4813       }
4814 
4815       reset_partition(pc_root, sb_size);
4816       x->use_cb_search_range = 0;
4817 #if CONFIG_COLLECT_COMPONENT_TIMING
4818       start_timing(cpi, first_partition_search_pass_time);
4819 #endif
4820       init_first_partition_pass_stats_tables(cpi,
4821                                              x->first_partition_pass_stats);
4822       // Do the first pass if we need two pass partition search
4823       if (cpi->two_pass_partition_search &&
4824           cpi->sf.use_square_partition_only_threshold > BLOCK_4X4 &&
4825           mi_row + mi_size_high[sb_size] <= cm->mi_rows &&
4826           mi_col + mi_size_wide[sb_size] <= cm->mi_cols &&
4827           cm->current_frame.frame_type != KEY_FRAME) {
4828         first_partition_search_pass(cpi, td, tile_data, mi_row, mi_col, tp);
4829       }
4830 #if CONFIG_COLLECT_COMPONENT_TIMING
4831       end_timing(cpi, first_partition_search_pass_time);
4832 #endif
4833 
4834 #if CONFIG_COLLECT_COMPONENT_TIMING
4835       start_timing(cpi, rd_pick_partition_time);
4836 #endif
4837       BLOCK_SIZE max_sq_size = BLOCK_128X128;
4838       switch (cpi->oxcf.max_partition_size) {
4839         case 4: max_sq_size = BLOCK_4X4; break;
4840         case 8: max_sq_size = BLOCK_8X8; break;
4841         case 16: max_sq_size = BLOCK_16X16; break;
4842         case 32: max_sq_size = BLOCK_32X32; break;
4843         case 64: max_sq_size = BLOCK_64X64; break;
4844         case 128: max_sq_size = BLOCK_128X128; break;
4845         default: assert(0); break;
4846       }
4847       max_sq_size = AOMMIN(max_sq_size, sb_size);
4848 
4849       BLOCK_SIZE min_sq_size = BLOCK_4X4;
4850       switch (cpi->oxcf.min_partition_size) {
4851         case 4: min_sq_size = BLOCK_4X4; break;
4852         case 8: min_sq_size = BLOCK_8X8; break;
4853         case 16: min_sq_size = BLOCK_16X16; break;
4854         case 32: min_sq_size = BLOCK_32X32; break;
4855         case 64: min_sq_size = BLOCK_64X64; break;
4856         case 128: min_sq_size = BLOCK_128X128; break;
4857         default: assert(0); break;
4858       }
4859 
4860       if (use_auto_max_partition(cpi, sb_size, mi_row, mi_col)) {
4861         float features[FEATURE_SIZE_MAX_MIN_PART_PRED] = { 0.0f };
4862 
4863         av1_get_max_min_partition_features(cpi, x, mi_row, mi_col, features);
4864         max_sq_size =
4865             AOMMIN(av1_predict_max_partition(cpi, x, features), max_sq_size);
4866       }
4867 
4868       min_sq_size = AOMMIN(min_sq_size, max_sq_size);
4869 
4870       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, sb_size,
4871                         max_sq_size, min_sq_size, &dummy_rdc, INT64_MAX,
4872                         pc_root, NULL);
4873 #if CONFIG_COLLECT_COMPONENT_TIMING
4874       end_timing(cpi, rd_pick_partition_time);
4875 #endif
4876     }
4877     // TODO(angiebird): Let inter_mode_rd_model_estimation support multi-tile.
4878     if (cpi->sf.inter_mode_rd_model_estimation == 1 && cm->tile_cols == 1 &&
4879         cm->tile_rows == 1) {
4880       av1_inter_mode_data_fit(tile_data, x->rdmult);
4881     }
4882     if (tile_data->allow_update_cdf && (cpi->row_mt == 1) &&
4883         (tile_info->mi_row_end > (mi_row + mib_size))) {
4884       if (sb_cols_in_tile == 1)
4885         memcpy(x->row_ctx, xd->tile_ctx, sizeof(*xd->tile_ctx));
4886       else if (sb_col_in_tile >= 1)
4887         memcpy(x->row_ctx + sb_col_in_tile - 1, xd->tile_ctx,
4888                sizeof(*xd->tile_ctx));
4889     }
4890     (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row,
4891                                     sb_col_in_tile, sb_cols_in_tile);
4892   }
4893 #if CONFIG_COLLECT_COMPONENT_TIMING
4894   end_timing(cpi, encode_sb_time);
4895 #endif
4896 }
4897 
init_encode_frame_mb_context(AV1_COMP * cpi)4898 static void init_encode_frame_mb_context(AV1_COMP *cpi) {
4899   AV1_COMMON *const cm = &cpi->common;
4900   const int num_planes = av1_num_planes(cm);
4901   MACROBLOCK *const x = &cpi->td.mb;
4902   MACROBLOCKD *const xd = &x->e_mbd;
4903 
4904   // Copy data over into macro block data structures.
4905   av1_setup_src_planes(x, cpi->source, 0, 0, num_planes,
4906                        cm->seq_params.sb_size);
4907 
4908   av1_setup_block_planes(xd, cm->seq_params.subsampling_x,
4909                          cm->seq_params.subsampling_y, num_planes);
4910 }
4911 
get_frame_type(const AV1_COMP * cpi)4912 static MV_REFERENCE_FRAME get_frame_type(const AV1_COMP *cpi) {
4913   if (frame_is_intra_only(&cpi->common)) {
4914     return INTRA_FRAME;
4915   } else if ((cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) ||
4916              cpi->rc.is_src_frame_internal_arf) {
4917     // We will not update the golden frame with an internal overlay frame
4918     return ALTREF_FRAME;
4919   } else if (cpi->refresh_golden_frame || cpi->refresh_alt2_ref_frame ||
4920              cpi->refresh_alt_ref_frame) {
4921     return GOLDEN_FRAME;
4922   } else {
4923     return LAST_FRAME;
4924   }
4925 }
4926 
select_tx_mode(const AV1_COMP * cpi)4927 static TX_MODE select_tx_mode(const AV1_COMP *cpi) {
4928   if (cpi->common.coded_lossless) return ONLY_4X4;
4929   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
4930     return TX_MODE_LARGEST;
4931   else if (cpi->sf.tx_size_search_method == USE_FULL_RD ||
4932            cpi->sf.tx_size_search_method == USE_FAST_RD)
4933     return TX_MODE_SELECT;
4934   else
4935     return cpi->common.tx_mode;
4936 }
4937 
av1_alloc_tile_data(AV1_COMP * cpi)4938 void av1_alloc_tile_data(AV1_COMP *cpi) {
4939   AV1_COMMON *const cm = &cpi->common;
4940   const int tile_cols = cm->tile_cols;
4941   const int tile_rows = cm->tile_rows;
4942   int tile_col, tile_row;
4943 
4944   if (cpi->tile_data != NULL) aom_free(cpi->tile_data);
4945   CHECK_MEM_ERROR(
4946       cm, cpi->tile_data,
4947       aom_memalign(32, tile_cols * tile_rows * sizeof(*cpi->tile_data)));
4948   cpi->allocated_tiles = tile_cols * tile_rows;
4949 
4950   for (tile_row = 0; tile_row < tile_rows; ++tile_row)
4951     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4952       TileDataEnc *const tile_data =
4953           &cpi->tile_data[tile_row * tile_cols + tile_col];
4954       int i, j;
4955       for (i = 0; i < BLOCK_SIZES_ALL; ++i) {
4956         for (j = 0; j < MAX_MODES; ++j) {
4957           tile_data->thresh_freq_fact[i][j] = 32;
4958         }
4959       }
4960     }
4961 }
4962 
av1_init_tile_data(AV1_COMP * cpi)4963 void av1_init_tile_data(AV1_COMP *cpi) {
4964   AV1_COMMON *const cm = &cpi->common;
4965   const int num_planes = av1_num_planes(cm);
4966   const int tile_cols = cm->tile_cols;
4967   const int tile_rows = cm->tile_rows;
4968   int tile_col, tile_row;
4969   TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
4970   TOKENLIST *tplist = cpi->tplist[0][0];
4971   unsigned int tile_tok = 0;
4972   int tplist_count = 0;
4973 
4974   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
4975     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
4976       TileDataEnc *const tile_data =
4977           &cpi->tile_data[tile_row * tile_cols + tile_col];
4978       TileInfo *const tile_info = &tile_data->tile_info;
4979       av1_tile_init(tile_info, cm, tile_row, tile_col);
4980 
4981       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
4982       pre_tok = cpi->tile_tok[tile_row][tile_col];
4983       tile_tok = allocated_tokens(
4984           *tile_info, cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
4985       cpi->tplist[tile_row][tile_col] = tplist + tplist_count;
4986       tplist = cpi->tplist[tile_row][tile_col];
4987       tplist_count = av1_get_sb_rows_in_tile(cm, tile_data->tile_info);
4988       tile_data->allow_update_cdf = !cm->large_scale_tile;
4989       tile_data->allow_update_cdf =
4990           tile_data->allow_update_cdf && !cm->disable_cdf_update;
4991       tile_data->tctx = *cm->fc;
4992     }
4993   }
4994 }
4995 
av1_encode_sb_row(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col,int mi_row)4996 void av1_encode_sb_row(AV1_COMP *cpi, ThreadData *td, int tile_row,
4997                        int tile_col, int mi_row) {
4998   AV1_COMMON *const cm = &cpi->common;
4999   const int num_planes = av1_num_planes(cm);
5000   const int tile_cols = cm->tile_cols;
5001   TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col];
5002   const TileInfo *const tile_info = &this_tile->tile_info;
5003   TOKENEXTRA *tok = NULL;
5004   const int sb_row_in_tile =
5005       (mi_row - tile_info->mi_row_start) >> cm->seq_params.mib_size_log2;
5006   const int tile_mb_cols =
5007       (tile_info->mi_col_end - tile_info->mi_col_start + 2) >> 2;
5008   const int num_mb_rows_in_sb =
5009       ((1 << (cm->seq_params.mib_size_log2 + MI_SIZE_LOG2)) + 8) >> 4;
5010 
5011   get_start_tok(cpi, tile_row, tile_col, mi_row, &tok,
5012                 cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes);
5013   cpi->tplist[tile_row][tile_col][sb_row_in_tile].start = tok;
5014 
5015   encode_sb_row(cpi, td, this_tile, mi_row, &tok, cpi->sf.use_nonrd_pick_mode);
5016 
5017   cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop = tok;
5018   cpi->tplist[tile_row][tile_col][sb_row_in_tile].count =
5019       (unsigned int)(cpi->tplist[tile_row][tile_col][sb_row_in_tile].stop -
5020                      cpi->tplist[tile_row][tile_col][sb_row_in_tile].start);
5021 
5022   assert(
5023       (unsigned int)(tok -
5024                      cpi->tplist[tile_row][tile_col][sb_row_in_tile].start) <=
5025       get_token_alloc(num_mb_rows_in_sb, tile_mb_cols,
5026                       cm->seq_params.mib_size_log2 + MI_SIZE_LOG2, num_planes));
5027 
5028   (void)tile_mb_cols;
5029   (void)num_mb_rows_in_sb;
5030 }
5031 
av1_encode_tile(AV1_COMP * cpi,ThreadData * td,int tile_row,int tile_col)5032 void av1_encode_tile(AV1_COMP *cpi, ThreadData *td, int tile_row,
5033                      int tile_col) {
5034   AV1_COMMON *const cm = &cpi->common;
5035   TileDataEnc *const this_tile =
5036       &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
5037   const TileInfo *const tile_info = &this_tile->tile_info;
5038   int mi_row;
5039 
5040   av1_inter_mode_data_init(this_tile);
5041 
5042   av1_zero_above_context(cm, &td->mb.e_mbd, tile_info->mi_col_start,
5043                          tile_info->mi_col_end, tile_row);
5044   av1_init_above_context(cm, &td->mb.e_mbd, tile_row);
5045 
5046   // Set up pointers to per thread motion search counters.
5047   this_tile->m_search_count = 0;   // Count of motion search hits.
5048   this_tile->ex_search_count = 0;  // Exhaustive mesh search hits.
5049   td->mb.m_search_count_ptr = &this_tile->m_search_count;
5050   td->mb.ex_search_count_ptr = &this_tile->ex_search_count;
5051 
5052   cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
5053 
5054   av1_crc32c_calculator_init(&td->mb.mb_rd_record.crc_calculator);
5055 
5056   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
5057        mi_row += cm->seq_params.mib_size) {
5058     av1_encode_sb_row(cpi, td, tile_row, tile_col, mi_row);
5059   }
5060 }
5061 
encode_tiles(AV1_COMP * cpi)5062 static void encode_tiles(AV1_COMP *cpi) {
5063   AV1_COMMON *const cm = &cpi->common;
5064   const int tile_cols = cm->tile_cols;
5065   const int tile_rows = cm->tile_rows;
5066   int tile_col, tile_row;
5067 
5068   if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows)
5069     av1_alloc_tile_data(cpi);
5070 
5071   av1_init_tile_data(cpi);
5072 
5073   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
5074     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
5075       TileDataEnc *const this_tile =
5076           &cpi->tile_data[tile_row * cm->tile_cols + tile_col];
5077       cpi->td.intrabc_used = 0;
5078       cpi->td.mb.e_mbd.tile_ctx = &this_tile->tctx;
5079       cpi->td.mb.tile_pb_ctx = &this_tile->tctx;
5080       av1_encode_tile(cpi, &cpi->td, tile_row, tile_col);
5081       cpi->intrabc_used |= cpi->td.intrabc_used;
5082     }
5083   }
5084 }
5085 
5086 #define GLOBAL_TRANS_TYPES_ENC 3  // highest motion model to search
gm_get_params_cost(const WarpedMotionParams * gm,const WarpedMotionParams * ref_gm,int allow_hp)5087 static int gm_get_params_cost(const WarpedMotionParams *gm,
5088                               const WarpedMotionParams *ref_gm, int allow_hp) {
5089   int params_cost = 0;
5090   int trans_bits, trans_prec_diff;
5091   switch (gm->wmtype) {
5092     case AFFINE:
5093     case ROTZOOM:
5094       params_cost += aom_count_signed_primitive_refsubexpfin(
5095           GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5096           (ref_gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS),
5097           (gm->wmmat[2] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5098       params_cost += aom_count_signed_primitive_refsubexpfin(
5099           GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5100           (ref_gm->wmmat[3] >> GM_ALPHA_PREC_DIFF),
5101           (gm->wmmat[3] >> GM_ALPHA_PREC_DIFF));
5102       if (gm->wmtype >= AFFINE) {
5103         params_cost += aom_count_signed_primitive_refsubexpfin(
5104             GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5105             (ref_gm->wmmat[4] >> GM_ALPHA_PREC_DIFF),
5106             (gm->wmmat[4] >> GM_ALPHA_PREC_DIFF));
5107         params_cost += aom_count_signed_primitive_refsubexpfin(
5108             GM_ALPHA_MAX + 1, SUBEXPFIN_K,
5109             (ref_gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) -
5110                 (1 << GM_ALPHA_PREC_BITS),
5111             (gm->wmmat[5] >> GM_ALPHA_PREC_DIFF) - (1 << GM_ALPHA_PREC_BITS));
5112       }
5113       AOM_FALLTHROUGH_INTENDED;
5114     case TRANSLATION:
5115       trans_bits = (gm->wmtype == TRANSLATION)
5116                        ? GM_ABS_TRANS_ONLY_BITS - !allow_hp
5117                        : GM_ABS_TRANS_BITS;
5118       trans_prec_diff = (gm->wmtype == TRANSLATION)
5119                             ? GM_TRANS_ONLY_PREC_DIFF + !allow_hp
5120                             : GM_TRANS_PREC_DIFF;
5121       params_cost += aom_count_signed_primitive_refsubexpfin(
5122           (1 << trans_bits) + 1, SUBEXPFIN_K,
5123           (ref_gm->wmmat[0] >> trans_prec_diff),
5124           (gm->wmmat[0] >> trans_prec_diff));
5125       params_cost += aom_count_signed_primitive_refsubexpfin(
5126           (1 << trans_bits) + 1, SUBEXPFIN_K,
5127           (ref_gm->wmmat[1] >> trans_prec_diff),
5128           (gm->wmmat[1] >> trans_prec_diff));
5129       AOM_FALLTHROUGH_INTENDED;
5130     case IDENTITY: break;
5131     default: assert(0);
5132   }
5133   return (params_cost << AV1_PROB_COST_SHIFT);
5134 }
5135 
do_gm_search_logic(SPEED_FEATURES * const sf,int num_refs_using_gm,int frame)5136 static int do_gm_search_logic(SPEED_FEATURES *const sf, int num_refs_using_gm,
5137                               int frame) {
5138   (void)num_refs_using_gm;
5139   (void)frame;
5140   switch (sf->gm_search_type) {
5141     case GM_FULL_SEARCH: return 1;
5142     case GM_REDUCED_REF_SEARCH_SKIP_L2_L3:
5143       return !(frame == LAST2_FRAME || frame == LAST3_FRAME);
5144     case GM_REDUCED_REF_SEARCH_SKIP_L2_L3_ARF2:
5145       return !(frame == LAST2_FRAME || frame == LAST3_FRAME ||
5146                (frame == ALTREF2_FRAME));
5147     case GM_DISABLE_SEARCH: return 0;
5148     default: assert(0);
5149   }
5150   return 1;
5151 }
5152 
get_max_allowed_ref_frames(const AV1_COMP * cpi)5153 static int get_max_allowed_ref_frames(const AV1_COMP *cpi) {
5154   const unsigned int max_allowed_refs_for_given_speed =
5155       (cpi->sf.selective_ref_frame >= 3) ? INTER_REFS_PER_FRAME - 1
5156                                          : INTER_REFS_PER_FRAME;
5157   return AOMMIN(max_allowed_refs_for_given_speed,
5158                 cpi->oxcf.max_reference_frames);
5159 }
5160 
5161 // Enforce the number of references for each arbitrary frame based on user
5162 // options and speed.
enforce_max_ref_frames(AV1_COMP * cpi)5163 static void enforce_max_ref_frames(AV1_COMP *cpi) {
5164   MV_REFERENCE_FRAME ref_frame;
5165   int total_valid_refs = 0;
5166   for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) {
5167     if (cpi->ref_frame_flags & av1_ref_frame_flag_list[ref_frame]) {
5168       total_valid_refs++;
5169     }
5170   }
5171 
5172   const int max_allowed_refs = get_max_allowed_ref_frames(cpi);
5173 
5174   // When more than 'max_allowed_refs' are available, we reduce the number of
5175   // reference frames one at a time based on this order.
5176   const MV_REFERENCE_FRAME disable_order[] = {
5177     LAST3_FRAME,
5178     LAST2_FRAME,
5179     ALTREF2_FRAME,
5180     GOLDEN_FRAME,
5181   };
5182 
5183   for (int i = 0; i < 4 && total_valid_refs > max_allowed_refs; ++i) {
5184     const MV_REFERENCE_FRAME ref_frame_to_disable = disable_order[i];
5185 
5186     if (!(cpi->ref_frame_flags &
5187           av1_ref_frame_flag_list[ref_frame_to_disable])) {
5188       continue;
5189     }
5190 
5191     switch (ref_frame_to_disable) {
5192       case LAST3_FRAME: cpi->ref_frame_flags &= ~AOM_LAST3_FLAG; break;
5193       case LAST2_FRAME: cpi->ref_frame_flags &= ~AOM_LAST2_FLAG; break;
5194       case ALTREF2_FRAME: cpi->ref_frame_flags &= ~AOM_ALT2_FLAG; break;
5195       case GOLDEN_FRAME: cpi->ref_frame_flags &= ~AOM_GOLD_FLAG; break;
5196       default: assert(0);
5197     }
5198     --total_valid_refs;
5199   }
5200   assert(total_valid_refs <= max_allowed_refs);
5201 }
5202 
av1_refs_are_one_sided(const AV1_COMMON * cm)5203 static INLINE int av1_refs_are_one_sided(const AV1_COMMON *cm) {
5204   assert(!frame_is_intra_only(cm));
5205 
5206   int one_sided_refs = 1;
5207   for (int ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) {
5208     const RefCntBuffer *const buf = get_ref_frame_buf(cm, ref);
5209     if (buf == NULL) continue;
5210 
5211     const int ref_order_hint = buf->order_hint;
5212     if (get_relative_dist(&cm->seq_params.order_hint_info, ref_order_hint,
5213                           (int)cm->current_frame.order_hint) > 0) {
5214       one_sided_refs = 0;  // bwd reference
5215       break;
5216     }
5217   }
5218   return one_sided_refs;
5219 }
5220 
get_skip_mode_ref_offsets(const AV1_COMMON * cm,int ref_order_hint[2])5221 static INLINE void get_skip_mode_ref_offsets(const AV1_COMMON *cm,
5222                                              int ref_order_hint[2]) {
5223   const SkipModeInfo *const skip_mode_info = &cm->current_frame.skip_mode_info;
5224   ref_order_hint[0] = ref_order_hint[1] = 0;
5225   if (!skip_mode_info->skip_mode_allowed) return;
5226 
5227   const RefCntBuffer *const buf_0 =
5228       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_0);
5229   const RefCntBuffer *const buf_1 =
5230       get_ref_frame_buf(cm, LAST_FRAME + skip_mode_info->ref_frame_idx_1);
5231   assert(buf_0 != NULL && buf_1 != NULL);
5232 
5233   ref_order_hint[0] = buf_0->order_hint;
5234   ref_order_hint[1] = buf_1->order_hint;
5235 }
5236 
check_skip_mode_enabled(AV1_COMP * const cpi)5237 static int check_skip_mode_enabled(AV1_COMP *const cpi) {
5238   AV1_COMMON *const cm = &cpi->common;
5239 
5240   av1_setup_skip_mode_allowed(cm);
5241   if (!cm->current_frame.skip_mode_info.skip_mode_allowed) return 0;
5242 
5243   // Turn off skip mode if the temporal distances of the reference pair to the
5244   // current frame are different by more than 1 frame.
5245   const int cur_offset = (int)cm->current_frame.order_hint;
5246   int ref_offset[2];
5247   get_skip_mode_ref_offsets(cm, ref_offset);
5248   const int cur_to_ref0 = get_relative_dist(&cm->seq_params.order_hint_info,
5249                                             cur_offset, ref_offset[0]);
5250   const int cur_to_ref1 = abs(get_relative_dist(&cm->seq_params.order_hint_info,
5251                                                 cur_offset, ref_offset[1]));
5252   if (abs(cur_to_ref0 - cur_to_ref1) > 1) return 0;
5253 
5254   // High Latency: Turn off skip mode if all refs are fwd.
5255   if (cpi->all_one_sided_refs && cpi->oxcf.lag_in_frames > 0) return 0;
5256 
5257   static const int flag_list[REF_FRAMES] = { 0,
5258                                              AOM_LAST_FLAG,
5259                                              AOM_LAST2_FLAG,
5260                                              AOM_LAST3_FLAG,
5261                                              AOM_GOLD_FLAG,
5262                                              AOM_BWD_FLAG,
5263                                              AOM_ALT2_FLAG,
5264                                              AOM_ALT_FLAG };
5265   const int ref_frame[2] = {
5266     cm->current_frame.skip_mode_info.ref_frame_idx_0 + LAST_FRAME,
5267     cm->current_frame.skip_mode_info.ref_frame_idx_1 + LAST_FRAME
5268   };
5269   if (!(cpi->ref_frame_flags & flag_list[ref_frame[0]]) ||
5270       !(cpi->ref_frame_flags & flag_list[ref_frame[1]]))
5271     return 0;
5272 
5273   return 1;
5274 }
5275 
5276 // Function to decide if we can skip the global motion parameter computation
5277 // for a particular ref frame
skip_gm_frame(AV1_COMMON * const cm,int ref_frame)5278 static INLINE int skip_gm_frame(AV1_COMMON *const cm, int ref_frame) {
5279   if ((ref_frame == LAST3_FRAME || ref_frame == LAST2_FRAME) &&
5280       cm->global_motion[GOLDEN_FRAME].wmtype != IDENTITY) {
5281     return get_relative_dist(
5282                &cm->seq_params.order_hint_info,
5283                cm->cur_frame->ref_order_hints[ref_frame - LAST_FRAME],
5284                cm->cur_frame->ref_order_hints[GOLDEN_FRAME - LAST_FRAME]) <= 0;
5285   }
5286   return 0;
5287 }
5288 
set_default_interp_skip_flags(AV1_COMP * cpi)5289 static void set_default_interp_skip_flags(AV1_COMP *cpi) {
5290   const int num_planes = av1_num_planes(&cpi->common);
5291   cpi->default_interp_skip_flags = (num_planes == 1)
5292                                        ? DEFAULT_LUMA_INTERP_SKIP_FLAG
5293                                        : DEFAULT_INTERP_SKIP_FLAG;
5294 }
5295 
encode_frame_internal(AV1_COMP * cpi)5296 static void encode_frame_internal(AV1_COMP *cpi) {
5297   ThreadData *const td = &cpi->td;
5298   MACROBLOCK *const x = &td->mb;
5299   AV1_COMMON *const cm = &cpi->common;
5300   MACROBLOCKD *const xd = &x->e_mbd;
5301   RD_COUNTS *const rdc = &cpi->td.rd_counts;
5302   int i;
5303 
5304   x->min_partition_size = AOMMIN(x->min_partition_size, cm->seq_params.sb_size);
5305   x->max_partition_size = AOMMIN(x->max_partition_size, cm->seq_params.sb_size);
5306 #if CONFIG_DIST_8X8
5307   x->using_dist_8x8 = cpi->oxcf.using_dist_8x8;
5308   x->tune_metric = cpi->oxcf.tuning;
5309 #endif
5310   cm->setup_mi(cm);
5311 
5312   xd->mi = cm->mi_grid_visible;
5313   xd->mi[0] = cm->mi;
5314 
5315   av1_zero(*td->counts);
5316   av1_zero(rdc->comp_pred_diff);
5317   // Two pass partition search can be enabled/disabled for different frames.
5318   // Reset this data at frame level to avoid any incorrect usage.
5319   init_first_partition_pass_stats_tables(cpi, x->first_partition_pass_stats);
5320 
5321   // Reset the flag.
5322   cpi->intrabc_used = 0;
5323   // Need to disable intrabc when superres is selected
5324   if (av1_superres_scaled(cm)) {
5325     cm->allow_intrabc = 0;
5326   }
5327 
5328   cm->allow_intrabc &= (cpi->oxcf.enable_intrabc);
5329 
5330   if (cpi->oxcf.pass != 1 && av1_use_hash_me(cm)) {
5331     // add to hash table
5332     const int pic_width = cpi->source->y_crop_width;
5333     const int pic_height = cpi->source->y_crop_height;
5334     uint32_t *block_hash_values[2][2];
5335     int8_t *is_block_same[2][3];
5336     int k, j;
5337 
5338     for (k = 0; k < 2; k++) {
5339       for (j = 0; j < 2; j++) {
5340         CHECK_MEM_ERROR(cm, block_hash_values[k][j],
5341                         aom_malloc(sizeof(uint32_t) * pic_width * pic_height));
5342       }
5343 
5344       for (j = 0; j < 3; j++) {
5345         CHECK_MEM_ERROR(cm, is_block_same[k][j],
5346                         aom_malloc(sizeof(int8_t) * pic_width * pic_height));
5347       }
5348     }
5349 
5350     av1_hash_table_create(&cm->cur_frame->hash_table);
5351     av1_generate_block_2x2_hash_value(cpi->source, block_hash_values[0],
5352                                       is_block_same[0], &cpi->td.mb);
5353     av1_generate_block_hash_value(cpi->source, 4, block_hash_values[0],
5354                                   block_hash_values[1], is_block_same[0],
5355                                   is_block_same[1], &cpi->td.mb);
5356     av1_add_to_hash_map_by_row_with_precal_data(
5357         &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
5358         pic_width, pic_height, 4);
5359     av1_generate_block_hash_value(cpi->source, 8, block_hash_values[1],
5360                                   block_hash_values[0], is_block_same[1],
5361                                   is_block_same[0], &cpi->td.mb);
5362     av1_add_to_hash_map_by_row_with_precal_data(
5363         &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
5364         pic_width, pic_height, 8);
5365     av1_generate_block_hash_value(cpi->source, 16, block_hash_values[0],
5366                                   block_hash_values[1], is_block_same[0],
5367                                   is_block_same[1], &cpi->td.mb);
5368     av1_add_to_hash_map_by_row_with_precal_data(
5369         &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
5370         pic_width, pic_height, 16);
5371     av1_generate_block_hash_value(cpi->source, 32, block_hash_values[1],
5372                                   block_hash_values[0], is_block_same[1],
5373                                   is_block_same[0], &cpi->td.mb);
5374     av1_add_to_hash_map_by_row_with_precal_data(
5375         &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
5376         pic_width, pic_height, 32);
5377     av1_generate_block_hash_value(cpi->source, 64, block_hash_values[0],
5378                                   block_hash_values[1], is_block_same[0],
5379                                   is_block_same[1], &cpi->td.mb);
5380     av1_add_to_hash_map_by_row_with_precal_data(
5381         &cm->cur_frame->hash_table, block_hash_values[1], is_block_same[1][2],
5382         pic_width, pic_height, 64);
5383 
5384     av1_generate_block_hash_value(cpi->source, 128, block_hash_values[1],
5385                                   block_hash_values[0], is_block_same[1],
5386                                   is_block_same[0], &cpi->td.mb);
5387     av1_add_to_hash_map_by_row_with_precal_data(
5388         &cm->cur_frame->hash_table, block_hash_values[0], is_block_same[0][2],
5389         pic_width, pic_height, 128);
5390 
5391     for (k = 0; k < 2; k++) {
5392       for (j = 0; j < 2; j++) {
5393         aom_free(block_hash_values[k][j]);
5394       }
5395 
5396       for (j = 0; j < 3; j++) {
5397         aom_free(is_block_same[k][j]);
5398       }
5399     }
5400   }
5401 
5402   for (i = 0; i < MAX_SEGMENTS; ++i) {
5403     const int qindex = cm->seg.enabled
5404                            ? av1_get_qindex(&cm->seg, i, cm->base_qindex)
5405                            : cm->base_qindex;
5406     xd->lossless[i] = qindex == 0 && cm->y_dc_delta_q == 0 &&
5407                       cm->u_dc_delta_q == 0 && cm->u_ac_delta_q == 0 &&
5408                       cm->v_dc_delta_q == 0 && cm->v_ac_delta_q == 0;
5409     if (xd->lossless[i]) cpi->has_lossless_segment = 1;
5410     xd->qindex[i] = qindex;
5411     if (xd->lossless[i]) {
5412       cpi->optimize_seg_arr[i] = 0;
5413     } else {
5414       cpi->optimize_seg_arr[i] = cpi->sf.optimize_coefficients;
5415     }
5416   }
5417   cm->coded_lossless = is_coded_lossless(cm, xd);
5418   cm->all_lossless = cm->coded_lossless && !av1_superres_scaled(cm);
5419 
5420   cm->tx_mode = select_tx_mode(cpi);
5421 
5422   // Fix delta q resolution for the moment
5423   cm->delta_q_info.delta_q_res = DEFAULT_DELTA_Q_RES;
5424   // Set delta_q_present_flag before it is used for the first time
5425   cm->delta_q_info.delta_lf_res = DEFAULT_DELTA_LF_RES;
5426   cm->delta_q_info.delta_q_present_flag = cpi->oxcf.deltaq_mode != NO_DELTA_Q;
5427   cm->delta_q_info.delta_lf_present_flag = cpi->oxcf.deltaq_mode == DELTA_Q_LF;
5428   cm->delta_q_info.delta_lf_multi = DEFAULT_DELTA_LF_MULTI;
5429   // update delta_q_present_flag and delta_lf_present_flag based on
5430   // base_qindex
5431   cm->delta_q_info.delta_q_present_flag &= cm->base_qindex > 0;
5432   cm->delta_q_info.delta_lf_present_flag &= cm->base_qindex > 0;
5433 
5434   if (cpi->twopass.gf_group.index &&
5435       cpi->twopass.gf_group.index < MAX_LAG_BUFFERS &&
5436       cpi->oxcf.enable_tpl_model) {
5437     TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index];
5438     TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr;
5439 
5440     int tpl_stride = tpl_frame->stride;
5441     int64_t intra_cost_base = 0;
5442     int64_t mc_dep_cost_base = 0;
5443     int row, col;
5444 
5445     for (row = 0; row < cm->mi_rows; ++row) {
5446       for (col = 0; col < cm->mi_cols; ++col) {
5447         TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col];
5448         intra_cost_base += this_stats->intra_cost;
5449         mc_dep_cost_base += this_stats->mc_dep_cost;
5450       }
5451     }
5452 
5453     aom_clear_system_state();
5454 
5455     if (tpl_frame->is_valid)
5456       cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base;
5457   }
5458 
5459   av1_frame_init_quantizer(cpi);
5460 
5461   av1_initialize_rd_consts(cpi);
5462   av1_initialize_me_consts(cpi, x, cm->base_qindex);
5463   init_encode_frame_mb_context(cpi);
5464   set_default_interp_skip_flags(cpi);
5465   if (cm->prev_frame)
5466     cm->last_frame_seg_map = cm->prev_frame->seg_map;
5467   else
5468     cm->last_frame_seg_map = NULL;
5469   if (cm->allow_intrabc || cm->coded_lossless) {
5470     av1_set_default_ref_deltas(cm->lf.ref_deltas);
5471     av1_set_default_mode_deltas(cm->lf.mode_deltas);
5472   } else if (cm->prev_frame) {
5473     memcpy(cm->lf.ref_deltas, cm->prev_frame->ref_deltas, REF_FRAMES);
5474     memcpy(cm->lf.mode_deltas, cm->prev_frame->mode_deltas, MAX_MODE_LF_DELTAS);
5475   }
5476   memcpy(cm->cur_frame->ref_deltas, cm->lf.ref_deltas, REF_FRAMES);
5477   memcpy(cm->cur_frame->mode_deltas, cm->lf.mode_deltas, MAX_MODE_LF_DELTAS);
5478 
5479   // Special case: set prev_mi to NULL when the previous mode info
5480   // context cannot be used.
5481   cm->prev_mi = cm->allow_ref_frame_mvs ? cm->prev_mip : NULL;
5482 
5483   x->txb_split_count = 0;
5484 #if CONFIG_SPEED_STATS
5485   x->tx_search_count = 0;
5486 #endif  // CONFIG_SPEED_STATS
5487 
5488 #if CONFIG_COLLECT_COMPONENT_TIMING
5489   start_timing(cpi, av1_compute_global_motion_time);
5490 #endif
5491   av1_zero(rdc->global_motion_used);
5492   av1_zero(cpi->gmparams_cost);
5493   if (cpi->common.current_frame.frame_type == INTER_FRAME && cpi->source &&
5494       cpi->oxcf.enable_global_motion && !cpi->global_motion_search_done) {
5495     YV12_BUFFER_CONFIG *ref_buf[REF_FRAMES];
5496     int frame;
5497     double params_by_motion[RANSAC_NUM_MOTIONS * (MAX_PARAMDIM - 1)];
5498     const double *params_this_motion;
5499     int inliers_by_motion[RANSAC_NUM_MOTIONS];
5500     WarpedMotionParams tmp_wm_params;
5501     // clang-format off
5502     static const double kIdentityParams[MAX_PARAMDIM - 1] = {
5503       0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0
5504     };
5505     // clang-format on
5506     int num_refs_using_gm = 0;
5507 
5508     for (frame = ALTREF_FRAME; frame >= LAST_FRAME; --frame) {
5509       ref_buf[frame] = NULL;
5510       RefCntBuffer *buf = get_ref_frame_buf(cm, frame);
5511       if (buf != NULL) ref_buf[frame] = &buf->buf;
5512       int pframe;
5513       cm->global_motion[frame] = default_warp_params;
5514       const WarpedMotionParams *ref_params =
5515           cm->prev_frame ? &cm->prev_frame->global_motion[frame]
5516                          : &default_warp_params;
5517       // check for duplicate buffer
5518       for (pframe = ALTREF_FRAME; pframe > frame; --pframe) {
5519         if (ref_buf[frame] == ref_buf[pframe]) break;
5520       }
5521       if (pframe > frame) {
5522         memcpy(&cm->global_motion[frame], &cm->global_motion[pframe],
5523                sizeof(WarpedMotionParams));
5524       } else if (ref_buf[frame] &&
5525                  ref_buf[frame]->y_crop_width == cpi->source->y_crop_width &&
5526                  ref_buf[frame]->y_crop_height == cpi->source->y_crop_height &&
5527                  do_gm_search_logic(&cpi->sf, num_refs_using_gm, frame) &&
5528                  !(cpi->sf.selective_ref_gm && skip_gm_frame(cm, frame))) {
5529         TransformationType model;
5530         const int64_t ref_frame_error = av1_frame_error(
5531             is_cur_buf_hbd(xd), xd->bd, ref_buf[frame]->y_buffer,
5532             ref_buf[frame]->y_stride, cpi->source->y_buffer,
5533             cpi->source->y_width, cpi->source->y_height, cpi->source->y_stride);
5534 
5535         if (ref_frame_error == 0) continue;
5536 
5537         aom_clear_system_state();
5538 
5539         // TODO(sarahparker, debargha): Explore do_adaptive_gm_estimation = 1
5540         const int do_adaptive_gm_estimation = 0;
5541 
5542         const int ref_frame_dist = get_relative_dist(
5543             &cm->seq_params.order_hint_info, cm->current_frame.order_hint,
5544             cm->cur_frame->ref_order_hints[frame - LAST_FRAME]);
5545         const GlobalMotionEstimationType gm_estimation_type =
5546             cm->seq_params.order_hint_info.enable_order_hint &&
5547                     abs(ref_frame_dist) <= 2 && do_adaptive_gm_estimation
5548                 ? GLOBAL_MOTION_DISFLOW_BASED
5549                 : GLOBAL_MOTION_FEATURE_BASED;
5550         for (model = ROTZOOM; model < GLOBAL_TRANS_TYPES_ENC; ++model) {
5551           int64_t best_warp_error = INT64_MAX;
5552           // Initially set all params to identity.
5553           for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5554             memcpy(params_by_motion + (MAX_PARAMDIM - 1) * i, kIdentityParams,
5555                    (MAX_PARAMDIM - 1) * sizeof(*params_by_motion));
5556           }
5557 
5558           av1_compute_global_motion(model, cpi->source, ref_buf[frame],
5559                                     cpi->common.seq_params.bit_depth,
5560                                     gm_estimation_type, inliers_by_motion,
5561                                     params_by_motion, RANSAC_NUM_MOTIONS);
5562 
5563           for (i = 0; i < RANSAC_NUM_MOTIONS; ++i) {
5564             if (inliers_by_motion[i] == 0) continue;
5565 
5566             params_this_motion = params_by_motion + (MAX_PARAMDIM - 1) * i;
5567             av1_convert_model_to_params(params_this_motion, &tmp_wm_params);
5568 
5569             if (tmp_wm_params.wmtype != IDENTITY) {
5570               const int64_t warp_error = av1_refine_integerized_param(
5571                   &tmp_wm_params, tmp_wm_params.wmtype, is_cur_buf_hbd(xd),
5572                   xd->bd, ref_buf[frame]->y_buffer, ref_buf[frame]->y_width,
5573                   ref_buf[frame]->y_height, ref_buf[frame]->y_stride,
5574                   cpi->source->y_buffer, cpi->source->y_width,
5575                   cpi->source->y_height, cpi->source->y_stride, 5,
5576                   best_warp_error);
5577               if (warp_error < best_warp_error) {
5578                 best_warp_error = warp_error;
5579                 // Save the wm_params modified by
5580                 // av1_refine_integerized_param() rather than motion index to
5581                 // avoid rerunning refine() below.
5582                 memcpy(&(cm->global_motion[frame]), &tmp_wm_params,
5583                        sizeof(WarpedMotionParams));
5584               }
5585             }
5586           }
5587           if (cm->global_motion[frame].wmtype <= AFFINE)
5588             if (!get_shear_params(&cm->global_motion[frame]))
5589               cm->global_motion[frame] = default_warp_params;
5590 
5591           if (cm->global_motion[frame].wmtype == TRANSLATION) {
5592             cm->global_motion[frame].wmmat[0] =
5593                 convert_to_trans_prec(cm->allow_high_precision_mv,
5594                                       cm->global_motion[frame].wmmat[0]) *
5595                 GM_TRANS_ONLY_DECODE_FACTOR;
5596             cm->global_motion[frame].wmmat[1] =
5597                 convert_to_trans_prec(cm->allow_high_precision_mv,
5598                                       cm->global_motion[frame].wmmat[1]) *
5599                 GM_TRANS_ONLY_DECODE_FACTOR;
5600           }
5601 
5602           // If the best error advantage found doesn't meet the threshold for
5603           // this motion type, revert to IDENTITY.
5604           if (!av1_is_enough_erroradvantage(
5605                   (double)best_warp_error / ref_frame_error,
5606                   gm_get_params_cost(&cm->global_motion[frame], ref_params,
5607                                      cm->allow_high_precision_mv),
5608                   cpi->sf.gm_erroradv_type)) {
5609             cm->global_motion[frame] = default_warp_params;
5610           }
5611           if (cm->global_motion[frame].wmtype != IDENTITY) break;
5612         }
5613         aom_clear_system_state();
5614       }
5615       if (cm->global_motion[frame].wmtype != IDENTITY) num_refs_using_gm++;
5616       cpi->gmparams_cost[frame] =
5617           gm_get_params_cost(&cm->global_motion[frame], ref_params,
5618                              cm->allow_high_precision_mv) +
5619           cpi->gmtype_cost[cm->global_motion[frame].wmtype] -
5620           cpi->gmtype_cost[IDENTITY];
5621     }
5622     // clear disabled ref_frames
5623     for (frame = LAST_FRAME; frame <= ALTREF_FRAME; ++frame) {
5624       const int ref_disabled =
5625           !(cpi->ref_frame_flags & av1_ref_frame_flag_list[frame]);
5626       if (ref_disabled && cpi->sf.recode_loop != DISALLOW_RECODE) {
5627         cpi->gmparams_cost[frame] = 0;
5628         cm->global_motion[frame] = default_warp_params;
5629       }
5630     }
5631     cpi->global_motion_search_done = 1;
5632   }
5633   memcpy(cm->cur_frame->global_motion, cm->global_motion,
5634          REF_FRAMES * sizeof(WarpedMotionParams));
5635 #if CONFIG_COLLECT_COMPONENT_TIMING
5636   end_timing(cpi, av1_compute_global_motion_time);
5637 #endif
5638 
5639 #if CONFIG_COLLECT_COMPONENT_TIMING
5640   start_timing(cpi, av1_setup_motion_field_time);
5641 #endif
5642   av1_setup_motion_field(cm);
5643 #if CONFIG_COLLECT_COMPONENT_TIMING
5644   end_timing(cpi, av1_setup_motion_field_time);
5645 #endif
5646 
5647   cpi->all_one_sided_refs =
5648       frame_is_intra_only(cm) ? 0 : av1_refs_are_one_sided(cm);
5649 
5650   cm->current_frame.skip_mode_info.skip_mode_flag =
5651       check_skip_mode_enabled(cpi);
5652 
5653   {
5654     cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read_dummy;
5655     cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write_dummy;
5656     cpi->row_mt = 0;
5657     if (cpi->oxcf.row_mt && (cpi->oxcf.max_threads > 1)) {
5658       cpi->row_mt = 1;
5659       cpi->row_mt_sync_read_ptr = av1_row_mt_sync_read;
5660       cpi->row_mt_sync_write_ptr = av1_row_mt_sync_write;
5661       av1_encode_tiles_row_mt(cpi);
5662     } else {
5663       if (AOMMIN(cpi->oxcf.max_threads, cm->tile_cols * cm->tile_rows) > 1)
5664         av1_encode_tiles_mt(cpi);
5665       else
5666         encode_tiles(cpi);
5667     }
5668   }
5669 
5670   // If intrabc is allowed but never selected, reset the allow_intrabc flag.
5671   if (cm->allow_intrabc && !cpi->intrabc_used) cm->allow_intrabc = 0;
5672   if (cm->allow_intrabc) cm->delta_q_info.delta_lf_present_flag = 0;
5673 }
5674 
av1_encode_frame(AV1_COMP * cpi)5675 void av1_encode_frame(AV1_COMP *cpi) {
5676   AV1_COMMON *const cm = &cpi->common;
5677   CurrentFrame *const current_frame = &cm->current_frame;
5678   const int num_planes = av1_num_planes(cm);
5679   // Indicates whether or not to use a default reduced set for ext-tx
5680   // rather than the potential full set of 16 transforms
5681   cm->reduced_tx_set_used = cpi->oxcf.reduced_tx_type_set;
5682 
5683   // Make sure segment_id is no larger than last_active_segid.
5684   if (cm->seg.enabled && cm->seg.update_map) {
5685     const int mi_rows = cm->mi_rows;
5686     const int mi_cols = cm->mi_cols;
5687     const int last_active_segid = cm->seg.last_active_segid;
5688     uint8_t *map = cpi->segmentation_map;
5689     for (int mi_row = 0; mi_row < mi_rows; ++mi_row) {
5690       for (int mi_col = 0; mi_col < mi_cols; ++mi_col) {
5691         map[mi_col] = AOMMIN(map[mi_col], last_active_segid);
5692       }
5693       map += mi_cols;
5694     }
5695   }
5696 
5697   av1_setup_frame_buf_refs(cm);
5698   enforce_max_ref_frames(cpi);
5699   av1_setup_frame_sign_bias(cm);
5700 
5701 #if CONFIG_MISMATCH_DEBUG
5702   mismatch_reset_frame(num_planes);
5703 #else
5704   (void)num_planes;
5705 #endif
5706 
5707   if (cpi->sf.frame_parameter_update) {
5708     int i;
5709     RD_OPT *const rd_opt = &cpi->rd;
5710     RD_COUNTS *const rdc = &cpi->td.rd_counts;
5711 
5712     // This code does a single RD pass over the whole frame assuming
5713     // either compound, single or hybrid prediction as per whatever has
5714     // worked best for that type of frame in the past.
5715     // It also predicts whether another coding mode would have worked
5716     // better than this coding mode. If that is the case, it remembers
5717     // that for subsequent frames.
5718     // It does the same analysis for transform size selection also.
5719     //
5720     // TODO(zoeliu): To investigate whether a frame_type other than
5721     // INTRA/ALTREF/GOLDEN/LAST needs to be specified seperately.
5722     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
5723     int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
5724     const int is_alt_ref = frame_type == ALTREF_FRAME;
5725 
5726     /* prediction (compound, single or hybrid) mode selection */
5727     // NOTE: "is_alt_ref" is true only for OVERLAY/INTNL_OVERLAY frames
5728     if (is_alt_ref || frame_is_intra_only(cm))
5729       current_frame->reference_mode = SINGLE_REFERENCE;
5730     else
5731       current_frame->reference_mode = REFERENCE_MODE_SELECT;
5732 
5733     cm->interp_filter = SWITCHABLE;
5734     if (cm->large_scale_tile) cm->interp_filter = EIGHTTAP_REGULAR;
5735 
5736     cm->switchable_motion_mode = 1;
5737 
5738     rdc->compound_ref_used_flag = 0;
5739     rdc->skip_mode_used_flag = 0;
5740 
5741     encode_frame_internal(cpi);
5742 
5743     for (i = 0; i < REFERENCE_MODES; ++i)
5744       mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
5745 
5746     if (current_frame->reference_mode == REFERENCE_MODE_SELECT) {
5747       // Use a flag that includes 4x4 blocks
5748       if (rdc->compound_ref_used_flag == 0) {
5749         current_frame->reference_mode = SINGLE_REFERENCE;
5750 #if CONFIG_ENTROPY_STATS
5751         av1_zero(cpi->td.counts->comp_inter);
5752 #endif  // CONFIG_ENTROPY_STATS
5753       }
5754     }
5755     // Re-check on the skip mode status as reference mode may have been
5756     // changed.
5757     SkipModeInfo *const skip_mode_info = &current_frame->skip_mode_info;
5758     if (frame_is_intra_only(cm) ||
5759         current_frame->reference_mode == SINGLE_REFERENCE) {
5760       skip_mode_info->skip_mode_allowed = 0;
5761       skip_mode_info->skip_mode_flag = 0;
5762     }
5763     if (skip_mode_info->skip_mode_flag && rdc->skip_mode_used_flag == 0)
5764       skip_mode_info->skip_mode_flag = 0;
5765 
5766     if (!cm->large_scale_tile) {
5767       if (cm->tx_mode == TX_MODE_SELECT && cpi->td.mb.txb_split_count == 0)
5768         cm->tx_mode = TX_MODE_LARGEST;
5769     }
5770   } else {
5771     encode_frame_internal(cpi);
5772   }
5773 }
5774 
update_txfm_count(MACROBLOCK * x,MACROBLOCKD * xd,FRAME_COUNTS * counts,TX_SIZE tx_size,int depth,int blk_row,int blk_col,uint8_t allow_update_cdf)5775 static void update_txfm_count(MACROBLOCK *x, MACROBLOCKD *xd,
5776                               FRAME_COUNTS *counts, TX_SIZE tx_size, int depth,
5777                               int blk_row, int blk_col,
5778                               uint8_t allow_update_cdf) {
5779   MB_MODE_INFO *mbmi = xd->mi[0];
5780   const BLOCK_SIZE bsize = mbmi->sb_type;
5781   const int max_blocks_high = max_block_high(xd, bsize, 0);
5782   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
5783   int ctx = txfm_partition_context(xd->above_txfm_context + blk_col,
5784                                    xd->left_txfm_context + blk_row,
5785                                    mbmi->sb_type, tx_size);
5786   const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
5787   const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
5788 
5789   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
5790   assert(tx_size > TX_4X4);
5791 
5792   if (depth == MAX_VARTX_DEPTH) {
5793     // Don't add to counts in this case
5794     mbmi->tx_size = tx_size;
5795     txfm_partition_update(xd->above_txfm_context + blk_col,
5796                           xd->left_txfm_context + blk_row, tx_size, tx_size);
5797     return;
5798   }
5799 
5800   if (tx_size == plane_tx_size) {
5801 #if CONFIG_ENTROPY_STATS
5802     ++counts->txfm_partition[ctx][0];
5803 #endif
5804     if (allow_update_cdf)
5805       update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 0, 2);
5806     mbmi->tx_size = tx_size;
5807     txfm_partition_update(xd->above_txfm_context + blk_col,
5808                           xd->left_txfm_context + blk_row, tx_size, tx_size);
5809   } else {
5810     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
5811     const int bsw = tx_size_wide_unit[sub_txs];
5812     const int bsh = tx_size_high_unit[sub_txs];
5813 
5814 #if CONFIG_ENTROPY_STATS
5815     ++counts->txfm_partition[ctx][1];
5816 #endif
5817     if (allow_update_cdf)
5818       update_cdf(xd->tile_ctx->txfm_partition_cdf[ctx], 1, 2);
5819     ++x->txb_split_count;
5820 
5821     if (sub_txs == TX_4X4) {
5822       mbmi->inter_tx_size[txb_size_index] = TX_4X4;
5823       mbmi->tx_size = TX_4X4;
5824       txfm_partition_update(xd->above_txfm_context + blk_col,
5825                             xd->left_txfm_context + blk_row, TX_4X4, tx_size);
5826       return;
5827     }
5828 
5829     for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
5830       for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
5831         int offsetr = row;
5832         int offsetc = col;
5833 
5834         update_txfm_count(x, xd, counts, sub_txs, depth + 1, blk_row + offsetr,
5835                           blk_col + offsetc, allow_update_cdf);
5836       }
5837     }
5838   }
5839 }
5840 
tx_partition_count_update(const AV1_COMMON * const cm,MACROBLOCK * x,BLOCK_SIZE plane_bsize,int mi_row,int mi_col,FRAME_COUNTS * td_counts,uint8_t allow_update_cdf)5841 static void tx_partition_count_update(const AV1_COMMON *const cm, MACROBLOCK *x,
5842                                       BLOCK_SIZE plane_bsize, int mi_row,
5843                                       int mi_col, FRAME_COUNTS *td_counts,
5844                                       uint8_t allow_update_cdf) {
5845   MACROBLOCKD *xd = &x->e_mbd;
5846   const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5847   const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
5848   const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
5849   const int bh = tx_size_high_unit[max_tx_size];
5850   const int bw = tx_size_wide_unit[max_tx_size];
5851   int idx, idy;
5852 
5853   xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
5854   xd->left_txfm_context =
5855       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
5856 
5857   for (idy = 0; idy < mi_height; idy += bh)
5858     for (idx = 0; idx < mi_width; idx += bw)
5859       update_txfm_count(x, xd, td_counts, max_tx_size, 0, idy, idx,
5860                         allow_update_cdf);
5861 }
5862 
set_txfm_context(MACROBLOCKD * xd,TX_SIZE tx_size,int blk_row,int blk_col)5863 static void set_txfm_context(MACROBLOCKD *xd, TX_SIZE tx_size, int blk_row,
5864                              int blk_col) {
5865   MB_MODE_INFO *mbmi = xd->mi[0];
5866   const BLOCK_SIZE bsize = mbmi->sb_type;
5867   const int max_blocks_high = max_block_high(xd, bsize, 0);
5868   const int max_blocks_wide = max_block_wide(xd, bsize, 0);
5869   const int txb_size_index = av1_get_txb_size_index(bsize, blk_row, blk_col);
5870   const TX_SIZE plane_tx_size = mbmi->inter_tx_size[txb_size_index];
5871 
5872   if (blk_row >= max_blocks_high || blk_col >= max_blocks_wide) return;
5873 
5874   if (tx_size == plane_tx_size) {
5875     mbmi->tx_size = tx_size;
5876     txfm_partition_update(xd->above_txfm_context + blk_col,
5877                           xd->left_txfm_context + blk_row, tx_size, tx_size);
5878 
5879   } else {
5880     if (tx_size == TX_8X8) {
5881       mbmi->inter_tx_size[txb_size_index] = TX_4X4;
5882       mbmi->tx_size = TX_4X4;
5883       txfm_partition_update(xd->above_txfm_context + blk_col,
5884                             xd->left_txfm_context + blk_row, TX_4X4, tx_size);
5885       return;
5886     }
5887     const TX_SIZE sub_txs = sub_tx_size_map[tx_size];
5888     const int bsw = tx_size_wide_unit[sub_txs];
5889     const int bsh = tx_size_high_unit[sub_txs];
5890     for (int row = 0; row < tx_size_high_unit[tx_size]; row += bsh) {
5891       for (int col = 0; col < tx_size_wide_unit[tx_size]; col += bsw) {
5892         const int offsetr = blk_row + row;
5893         const int offsetc = blk_col + col;
5894         if (offsetr >= max_blocks_high || offsetc >= max_blocks_wide) continue;
5895         set_txfm_context(xd, sub_txs, offsetr, offsetc);
5896       }
5897     }
5898   }
5899 }
5900 
tx_partition_set_contexts(const AV1_COMMON * const cm,MACROBLOCKD * xd,BLOCK_SIZE plane_bsize,int mi_row,int mi_col)5901 static void tx_partition_set_contexts(const AV1_COMMON *const cm,
5902                                       MACROBLOCKD *xd, BLOCK_SIZE plane_bsize,
5903                                       int mi_row, int mi_col) {
5904   const int mi_width = block_size_wide[plane_bsize] >> tx_size_wide_log2[0];
5905   const int mi_height = block_size_high[plane_bsize] >> tx_size_high_log2[0];
5906   const TX_SIZE max_tx_size = get_vartx_max_txsize(xd, plane_bsize, 0);
5907   const int bh = tx_size_high_unit[max_tx_size];
5908   const int bw = tx_size_wide_unit[max_tx_size];
5909   int idx, idy;
5910 
5911   xd->above_txfm_context = cm->above_txfm_context[xd->tile.tile_row] + mi_col;
5912   xd->left_txfm_context =
5913       xd->left_txfm_context_buffer + (mi_row & MAX_MIB_MASK);
5914 
5915   for (idy = 0; idy < mi_height; idy += bh)
5916     for (idx = 0; idx < mi_width; idx += bw)
5917       set_txfm_context(xd, max_tx_size, idy, idx);
5918 }
5919 
encode_superblock(const AV1_COMP * const cpi,TileDataEnc * tile_data,ThreadData * td,TOKENEXTRA ** t,RUN_TYPE dry_run,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate)5920 static void encode_superblock(const AV1_COMP *const cpi, TileDataEnc *tile_data,
5921                               ThreadData *td, TOKENEXTRA **t, RUN_TYPE dry_run,
5922                               int mi_row, int mi_col, BLOCK_SIZE bsize,
5923                               int *rate) {
5924   const AV1_COMMON *const cm = &cpi->common;
5925   const int num_planes = av1_num_planes(cm);
5926   MACROBLOCK *const x = &td->mb;
5927   MACROBLOCKD *const xd = &x->e_mbd;
5928   MB_MODE_INFO **mi_4x4 = xd->mi;
5929   MB_MODE_INFO *mbmi = mi_4x4[0];
5930   const int seg_skip =
5931       segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP);
5932   const int mis = cm->mi_stride;
5933   const int mi_width = mi_size_wide[bsize];
5934   const int mi_height = mi_size_high[bsize];
5935   const int is_inter = is_inter_block(mbmi);
5936 
5937   if (cpi->two_pass_partition_search && x->cb_partition_scan) {
5938     for (int row = mi_row; row < mi_row + mi_width;
5939          row += FIRST_PARTITION_PASS_SAMPLE_REGION) {
5940       for (int col = mi_col; col < mi_col + mi_height;
5941            col += FIRST_PARTITION_PASS_SAMPLE_REGION) {
5942         const int index = av1_first_partition_pass_stats_index(row, col);
5943         FIRST_PARTITION_PASS_STATS *const stats =
5944             &x->first_partition_pass_stats[index];
5945         // Increase the counter of data samples.
5946         ++stats->sample_counts;
5947         // Increase the counter for ref_frame[0] and ref_frame[1].
5948         if (stats->ref0_counts[mbmi->ref_frame[0]] < 255)
5949           ++stats->ref0_counts[mbmi->ref_frame[0]];
5950         if (mbmi->ref_frame[1] >= 0 &&
5951             stats->ref1_counts[mbmi->ref_frame[1]] < 255)
5952           ++stats->ref1_counts[mbmi->ref_frame[1]];
5953         if (cpi->sf.use_first_partition_pass_interintra_stats) {
5954           // Increase the counter for interintra_motion_mode_count
5955           if (mbmi->motion_mode == 0 && mbmi->ref_frame[1] == INTRA_FRAME &&
5956               stats->interintra_motion_mode_count[mbmi->ref_frame[0]] < 255) {
5957             ++stats->interintra_motion_mode_count[mbmi->ref_frame[0]];
5958           }
5959         }
5960       }
5961     }
5962   }
5963 
5964   if (!is_inter) {
5965     xd->cfl.is_chroma_reference =
5966         is_chroma_reference(mi_row, mi_col, bsize, cm->seq_params.subsampling_x,
5967                             cm->seq_params.subsampling_y);
5968     xd->cfl.store_y = store_cfl_required(cm, xd);
5969     mbmi->skip = 1;
5970     for (int plane = 0; plane < num_planes; ++plane) {
5971       av1_encode_intra_block_plane(cpi, x, bsize, plane,
5972                                    cpi->optimize_seg_arr[mbmi->segment_id],
5973                                    mi_row, mi_col);
5974     }
5975 
5976     // If there is at least one lossless segment, force the skip for intra
5977     // block to be 0, in order to avoid the segment_id to be changed by in
5978     // write_segment_id().
5979     if (!cpi->common.seg.segid_preskip && cpi->common.seg.update_map &&
5980         cpi->has_lossless_segment)
5981       mbmi->skip = 0;
5982 
5983     xd->cfl.store_y = 0;
5984     if (av1_allow_palette(cm->allow_screen_content_tools, bsize)) {
5985       for (int plane = 0; plane < AOMMIN(2, num_planes); ++plane) {
5986         if (mbmi->palette_mode_info.palette_size[plane] > 0) {
5987           if (!dry_run) {
5988             av1_tokenize_color_map(x, plane, t, bsize, mbmi->tx_size,
5989                                    PALETTE_MAP, tile_data->allow_update_cdf,
5990                                    td->counts);
5991           } else if (dry_run == DRY_RUN_COSTCOEFFS) {
5992             rate +=
5993                 av1_cost_color_map(x, plane, bsize, mbmi->tx_size, PALETTE_MAP);
5994           }
5995         }
5996       }
5997     }
5998 
5999     av1_update_txb_context(cpi, td, dry_run, bsize, rate, mi_row, mi_col,
6000                            tile_data->allow_update_cdf);
6001   } else {
6002     int ref;
6003     const int is_compound = has_second_ref(mbmi);
6004 
6005     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
6006     for (ref = 0; ref < 1 + is_compound; ++ref) {
6007       const YV12_BUFFER_CONFIG *cfg =
6008           get_ref_frame_yv12_buf(cm, mbmi->ref_frame[ref]);
6009       assert(IMPLIES(!is_intrabc_block(mbmi), cfg));
6010       av1_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
6011                            xd->block_ref_scale_factors[ref], num_planes);
6012     }
6013 
6014     av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL, bsize, 0,
6015                                   av1_num_planes(cm) - 1);
6016     if (mbmi->motion_mode == OBMC_CAUSAL) {
6017       assert(cpi->oxcf.enable_obmc == 1);
6018       av1_build_obmc_inter_predictors_sb(cm, xd, mi_row, mi_col);
6019     }
6020 
6021 #if CONFIG_MISMATCH_DEBUG
6022     if (dry_run == OUTPUT_ENABLED) {
6023       for (int plane = 0; plane < num_planes; ++plane) {
6024         const struct macroblockd_plane *pd = &xd->plane[plane];
6025         int pixel_c, pixel_r;
6026         mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0,
6027                         pd->subsampling_x, pd->subsampling_y);
6028         if (!is_chroma_reference(mi_row, mi_col, bsize, pd->subsampling_x,
6029                                  pd->subsampling_y))
6030           continue;
6031         mismatch_record_block_pre(pd->dst.buf, pd->dst.stride,
6032                                   cm->current_frame.order_hint, plane, pixel_c,
6033                                   pixel_r, pd->width, pd->height,
6034                                   xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH);
6035       }
6036     }
6037 #else
6038     (void)num_planes;
6039 #endif
6040 
6041     av1_encode_sb(cpi, x, bsize, mi_row, mi_col, dry_run);
6042     av1_tokenize_sb_vartx(cpi, td, t, dry_run, mi_row, mi_col, bsize, rate,
6043                           tile_data->allow_update_cdf);
6044   }
6045 
6046   if (!dry_run) {
6047     if (av1_allow_intrabc(cm) && is_intrabc_block(mbmi)) td->intrabc_used = 1;
6048     if (cm->tx_mode == TX_MODE_SELECT && !xd->lossless[mbmi->segment_id] &&
6049         mbmi->sb_type > BLOCK_4X4 && !(is_inter && (mbmi->skip || seg_skip))) {
6050       if (is_inter) {
6051         tx_partition_count_update(cm, x, bsize, mi_row, mi_col, td->counts,
6052                                   tile_data->allow_update_cdf);
6053       } else {
6054         if (mbmi->tx_size != max_txsize_rect_lookup[bsize])
6055           ++x->txb_split_count;
6056         if (block_signals_txsize(bsize)) {
6057           const int tx_size_ctx = get_tx_size_context(xd);
6058           const int32_t tx_size_cat = bsize_to_tx_size_cat(bsize);
6059           const int depth = tx_size_to_depth(mbmi->tx_size, bsize);
6060           const int max_depths = bsize_to_max_depth(bsize);
6061 
6062           if (tile_data->allow_update_cdf)
6063             update_cdf(xd->tile_ctx->tx_size_cdf[tx_size_cat][tx_size_ctx],
6064                        depth, max_depths + 1);
6065 #if CONFIG_ENTROPY_STATS
6066           ++td->counts->intra_tx_size[tx_size_cat][tx_size_ctx][depth];
6067 #endif
6068         }
6069       }
6070       assert(IMPLIES(is_rect_tx(mbmi->tx_size), is_rect_tx_allowed(xd, mbmi)));
6071     } else {
6072       int i, j;
6073       TX_SIZE intra_tx_size;
6074       // The new intra coding scheme requires no change of transform size
6075       if (is_inter) {
6076         if (xd->lossless[mbmi->segment_id]) {
6077           intra_tx_size = TX_4X4;
6078         } else {
6079           intra_tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
6080         }
6081       } else {
6082         intra_tx_size = mbmi->tx_size;
6083       }
6084 
6085       for (j = 0; j < mi_height; j++)
6086         for (i = 0; i < mi_width; i++)
6087           if (mi_col + i < cm->mi_cols && mi_row + j < cm->mi_rows)
6088             mi_4x4[mis * j + i]->tx_size = intra_tx_size;
6089 
6090       if (intra_tx_size != max_txsize_rect_lookup[bsize]) ++x->txb_split_count;
6091     }
6092   }
6093 
6094   if (cm->tx_mode == TX_MODE_SELECT && block_signals_txsize(mbmi->sb_type) &&
6095       is_inter && !(mbmi->skip || seg_skip) &&
6096       !xd->lossless[mbmi->segment_id]) {
6097     if (dry_run) tx_partition_set_contexts(cm, xd, bsize, mi_row, mi_col);
6098   } else {
6099     TX_SIZE tx_size = mbmi->tx_size;
6100     // The new intra coding scheme requires no change of transform size
6101     if (is_inter) {
6102       if (xd->lossless[mbmi->segment_id]) {
6103         tx_size = TX_4X4;
6104       } else {
6105         tx_size = tx_size_from_tx_mode(bsize, cm->tx_mode);
6106       }
6107     } else {
6108       tx_size = (bsize > BLOCK_4X4) ? tx_size : TX_4X4;
6109     }
6110     mbmi->tx_size = tx_size;
6111     set_txfm_ctxs(tx_size, xd->n4_w, xd->n4_h,
6112                   (mbmi->skip || seg_skip) && is_inter_block(mbmi), xd);
6113   }
6114   CFL_CTX *const cfl = &xd->cfl;
6115   if (is_inter_block(mbmi) &&
6116       !is_chroma_reference(mi_row, mi_col, bsize, cfl->subsampling_x,
6117                            cfl->subsampling_y) &&
6118       is_cfl_allowed(xd)) {
6119     cfl_store_block(xd, mbmi->sb_type, mbmi->tx_size);
6120   }
6121 }
6122