1 /*
2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <limits.h>
12 #include <math.h>
13 #include <stdio.h>
14 
15 #include "./vp9_rtcd.h"
16 #include "./vpx_dsp_rtcd.h"
17 #include "./vpx_config.h"
18 
19 #include "vpx_dsp/vpx_dsp_common.h"
20 #include "vpx_ports/mem.h"
21 #include "vpx_ports/vpx_timer.h"
22 #include "vpx_ports/system_state.h"
23 
24 #include "vp9/common/vp9_common.h"
25 #include "vp9/common/vp9_entropy.h"
26 #include "vp9/common/vp9_entropymode.h"
27 #include "vp9/common/vp9_idct.h"
28 #include "vp9/common/vp9_mvref_common.h"
29 #include "vp9/common/vp9_pred_common.h"
30 #include "vp9/common/vp9_quant_common.h"
31 #include "vp9/common/vp9_reconintra.h"
32 #include "vp9/common/vp9_reconinter.h"
33 #include "vp9/common/vp9_seg_common.h"
34 #include "vp9/common/vp9_tile_common.h"
35 
36 #include "vp9/encoder/vp9_aq_complexity.h"
37 #include "vp9/encoder/vp9_aq_cyclicrefresh.h"
38 #include "vp9/encoder/vp9_aq_variance.h"
39 #include "vp9/encoder/vp9_encodeframe.h"
40 #include "vp9/encoder/vp9_encodemb.h"
41 #include "vp9/encoder/vp9_encodemv.h"
42 #include "vp9/encoder/vp9_ethread.h"
43 #include "vp9/encoder/vp9_extend.h"
44 #include "vp9/encoder/vp9_pickmode.h"
45 #include "vp9/encoder/vp9_rd.h"
46 #include "vp9/encoder/vp9_rdopt.h"
47 #include "vp9/encoder/vp9_segmentation.h"
48 #include "vp9/encoder/vp9_tokenize.h"
49 
50 static void encode_superblock(VP9_COMP *cpi, ThreadData * td,
51                               TOKENEXTRA **t, int output_enabled,
52                               int mi_row, int mi_col, BLOCK_SIZE bsize,
53                               PICK_MODE_CONTEXT *ctx);
54 
55 // This is used as a reference when computing the source variance for the
56 //  purposes of activity masking.
57 // Eventually this should be replaced by custom no-reference routines,
58 //  which will be faster.
59 static const uint8_t VP9_VAR_OFFS[64] = {
60     128, 128, 128, 128, 128, 128, 128, 128,
61     128, 128, 128, 128, 128, 128, 128, 128,
62     128, 128, 128, 128, 128, 128, 128, 128,
63     128, 128, 128, 128, 128, 128, 128, 128,
64     128, 128, 128, 128, 128, 128, 128, 128,
65     128, 128, 128, 128, 128, 128, 128, 128,
66     128, 128, 128, 128, 128, 128, 128, 128,
67     128, 128, 128, 128, 128, 128, 128, 128
68 };
69 
70 #if CONFIG_VP9_HIGHBITDEPTH
71 static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = {
72     128, 128, 128, 128, 128, 128, 128, 128,
73     128, 128, 128, 128, 128, 128, 128, 128,
74     128, 128, 128, 128, 128, 128, 128, 128,
75     128, 128, 128, 128, 128, 128, 128, 128,
76     128, 128, 128, 128, 128, 128, 128, 128,
77     128, 128, 128, 128, 128, 128, 128, 128,
78     128, 128, 128, 128, 128, 128, 128, 128,
79     128, 128, 128, 128, 128, 128, 128, 128
80 };
81 
82 static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = {
83     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
84     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
85     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
86     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
87     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
88     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
89     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4,
90     128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4, 128*4
91 };
92 
93 static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = {
94     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
95     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
96     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
97     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
98     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
99     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
100     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16,
101     128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16, 128*16
102 };
103 #endif  // CONFIG_VP9_HIGHBITDEPTH
104 
vp9_get_sby_perpixel_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs)105 unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi,
106                                            const struct buf_2d *ref,
107                                            BLOCK_SIZE bs) {
108   unsigned int sse;
109   const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
110                                               VP9_VAR_OFFS, 0, &sse);
111   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
112 }
113 
114 #if CONFIG_VP9_HIGHBITDEPTH
vp9_high_get_sby_perpixel_variance(VP9_COMP * cpi,const struct buf_2d * ref,BLOCK_SIZE bs,int bd)115 unsigned int vp9_high_get_sby_perpixel_variance(
116     VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) {
117   unsigned int var, sse;
118   switch (bd) {
119     case 10:
120       var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
121                                CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10),
122                                0, &sse);
123       break;
124     case 12:
125       var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
126                                CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12),
127                                0, &sse);
128       break;
129     case 8:
130     default:
131       var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride,
132                                CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8),
133                                0, &sse);
134       break;
135   }
136   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
137 }
138 #endif  // CONFIG_VP9_HIGHBITDEPTH
139 
get_sby_perpixel_diff_variance(VP9_COMP * cpi,const struct buf_2d * ref,int mi_row,int mi_col,BLOCK_SIZE bs)140 static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi,
141                                                    const struct buf_2d *ref,
142                                                    int mi_row, int mi_col,
143                                                    BLOCK_SIZE bs) {
144   unsigned int sse, var;
145   uint8_t *last_y;
146   const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME);
147 
148   assert(last != NULL);
149   last_y =
150       &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE];
151   var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse);
152   return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]);
153 }
154 
get_rd_var_based_fixed_partition(VP9_COMP * cpi,MACROBLOCK * x,int mi_row,int mi_col)155 static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x,
156                                                    int mi_row,
157                                                    int mi_col) {
158   unsigned int var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src,
159                                                     mi_row, mi_col,
160                                                     BLOCK_64X64);
161   if (var < 8)
162     return BLOCK_64X64;
163   else if (var < 128)
164     return BLOCK_32X32;
165   else if (var < 2048)
166     return BLOCK_16X16;
167   else
168     return BLOCK_8X8;
169 }
170 
171 // Lighter version of set_offsets that only sets the mode info
172 // pointers.
set_mode_info_offsets(VP9_COMMON * const cm,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col)173 static INLINE void set_mode_info_offsets(VP9_COMMON *const cm,
174                                          MACROBLOCK *const x,
175                                          MACROBLOCKD *const xd,
176                                          int mi_row,
177                                          int mi_col) {
178   const int idx_str = xd->mi_stride * mi_row + mi_col;
179   xd->mi = cm->mi_grid_visible + idx_str;
180   xd->mi[0] = cm->mi + idx_str;
181   x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col);
182 }
183 
set_offsets(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * const x,int mi_row,int mi_col,BLOCK_SIZE bsize)184 static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile,
185                         MACROBLOCK *const x, int mi_row, int mi_col,
186                         BLOCK_SIZE bsize) {
187   VP9_COMMON *const cm = &cpi->common;
188   MACROBLOCKD *const xd = &x->e_mbd;
189   MB_MODE_INFO *mbmi;
190   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
191   const int mi_height = num_8x8_blocks_high_lookup[bsize];
192   const struct segmentation *const seg = &cm->seg;
193 
194   set_skip_context(xd, mi_row, mi_col);
195 
196   set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
197 
198   mbmi = &xd->mi[0]->mbmi;
199 
200   // Set up destination pointers.
201   vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col);
202 
203   // Set up limit values for MV components.
204   // Mv beyond the range do not produce new/different prediction block.
205   x->mv_row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND);
206   x->mv_col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND);
207   x->mv_row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND;
208   x->mv_col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND;
209 
210   // Set up distance of MB to edge of frame in 1/8th pel units.
211   assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1)));
212   set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width,
213                  cm->mi_rows, cm->mi_cols);
214 
215   // Set up source buffers.
216   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
217 
218   // R/D setup.
219   x->rddiv = cpi->rd.RDDIV;
220   x->rdmult = cpi->rd.RDMULT;
221 
222   // Setup segment ID.
223   if (seg->enabled) {
224     if (cpi->oxcf.aq_mode != VARIANCE_AQ) {
225       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
226                                                  : cm->last_frame_seg_map;
227       mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
228     }
229     vp9_init_plane_quantizers(cpi, x);
230 
231     x->encode_breakout = cpi->segment_encode_breakout[mbmi->segment_id];
232   } else {
233     mbmi->segment_id = 0;
234     x->encode_breakout = cpi->encode_breakout;
235   }
236 
237   // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs()
238   xd->tile = *tile;
239 }
240 
duplicate_mode_info_in_sb(VP9_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,BLOCK_SIZE bsize)241 static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd,
242                                       int mi_row, int mi_col,
243                                       BLOCK_SIZE bsize) {
244   const int block_width = num_8x8_blocks_wide_lookup[bsize];
245   const int block_height = num_8x8_blocks_high_lookup[bsize];
246   int i, j;
247   for (j = 0; j < block_height; ++j)
248     for (i = 0; i < block_width; ++i) {
249       if (mi_row + j < cm->mi_rows && mi_col + i < cm->mi_cols)
250         xd->mi[j * xd->mi_stride + i] = xd->mi[0];
251     }
252 }
253 
set_block_size(VP9_COMP * const cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE bsize)254 static void set_block_size(VP9_COMP * const cpi,
255                            MACROBLOCK *const x,
256                            MACROBLOCKD *const xd,
257                            int mi_row, int mi_col,
258                            BLOCK_SIZE bsize) {
259   if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
260     set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col);
261     xd->mi[0]->mbmi.sb_type = bsize;
262   }
263 }
264 
265 typedef struct {
266   int64_t sum_square_error;
267   int64_t sum_error;
268   int log2_count;
269   int variance;
270 } var;
271 
272 typedef struct {
273   var none;
274   var horz[2];
275   var vert[2];
276 } partition_variance;
277 
278 typedef struct {
279   partition_variance part_variances;
280   var split[4];
281 } v4x4;
282 
283 typedef struct {
284   partition_variance part_variances;
285   v4x4 split[4];
286 } v8x8;
287 
288 typedef struct {
289   partition_variance part_variances;
290   v8x8 split[4];
291 } v16x16;
292 
293 typedef struct {
294   partition_variance part_variances;
295   v16x16 split[4];
296 } v32x32;
297 
298 typedef struct {
299   partition_variance part_variances;
300   v32x32 split[4];
301 } v64x64;
302 
303 typedef struct {
304   partition_variance *part_variances;
305   var *split[4];
306 } variance_node;
307 
308 typedef enum {
309   V16X16,
310   V32X32,
311   V64X64,
312 } TREE_LEVEL;
313 
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)314 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
315   int i;
316   node->part_variances = NULL;
317   switch (bsize) {
318     case BLOCK_64X64: {
319       v64x64 *vt = (v64x64 *) data;
320       node->part_variances = &vt->part_variances;
321       for (i = 0; i < 4; i++)
322         node->split[i] = &vt->split[i].part_variances.none;
323       break;
324     }
325     case BLOCK_32X32: {
326       v32x32 *vt = (v32x32 *) data;
327       node->part_variances = &vt->part_variances;
328       for (i = 0; i < 4; i++)
329         node->split[i] = &vt->split[i].part_variances.none;
330       break;
331     }
332     case BLOCK_16X16: {
333       v16x16 *vt = (v16x16 *) data;
334       node->part_variances = &vt->part_variances;
335       for (i = 0; i < 4; i++)
336         node->split[i] = &vt->split[i].part_variances.none;
337       break;
338     }
339     case BLOCK_8X8: {
340       v8x8 *vt = (v8x8 *) data;
341       node->part_variances = &vt->part_variances;
342       for (i = 0; i < 4; i++)
343         node->split[i] = &vt->split[i].part_variances.none;
344       break;
345     }
346     case BLOCK_4X4: {
347       v4x4 *vt = (v4x4 *) data;
348       node->part_variances = &vt->part_variances;
349       for (i = 0; i < 4; i++)
350         node->split[i] = &vt->split[i];
351       break;
352     }
353     default: {
354       assert(0);
355       break;
356     }
357   }
358 }
359 
360 // Set variance values given sum square error, sum error, count.
fill_variance(int64_t s2,int64_t s,int c,var * v)361 static void fill_variance(int64_t s2, int64_t s, int c, var *v) {
362   v->sum_square_error = s2;
363   v->sum_error = s;
364   v->log2_count = c;
365 }
366 
get_variance(var * v)367 static void get_variance(var *v) {
368   v->variance = (int)(256 * (v->sum_square_error -
369       ((v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count);
370 }
371 
sum_2_variances(const var * a,const var * b,var * r)372 static void sum_2_variances(const var *a, const var *b, var *r) {
373   assert(a->log2_count == b->log2_count);
374   fill_variance(a->sum_square_error + b->sum_square_error,
375                 a->sum_error + b->sum_error, a->log2_count + 1, r);
376 }
377 
fill_variance_tree(void * data,BLOCK_SIZE bsize)378 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
379   variance_node node;
380   memset(&node, 0, sizeof(node));
381   tree_to_node(data, bsize, &node);
382   sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
383   sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
384   sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
385   sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
386   sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
387                   &node.part_variances->none);
388 }
389 
set_vt_partitioning(VP9_COMP * cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,int force_split)390 static int set_vt_partitioning(VP9_COMP *cpi,
391                                MACROBLOCK *const x,
392                                MACROBLOCKD *const xd,
393                                void *data,
394                                BLOCK_SIZE bsize,
395                                int mi_row,
396                                int mi_col,
397                                int64_t threshold,
398                                BLOCK_SIZE bsize_min,
399                                int force_split) {
400   VP9_COMMON * const cm = &cpi->common;
401   variance_node vt;
402   const int block_width = num_8x8_blocks_wide_lookup[bsize];
403   const int block_height = num_8x8_blocks_high_lookup[bsize];
404   const int low_res = (cm->width <= 352 && cm->height <= 288);
405 
406   assert(block_height == block_width);
407   tree_to_node(data, bsize, &vt);
408 
409   if (force_split == 1)
410     return 0;
411 
412   // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
413   // variance is below threshold, otherwise split will be selected.
414   // No check for vert/horiz split as too few samples for variance.
415   if (bsize == bsize_min) {
416     // Variance already computed to set the force_split.
417     if (low_res || cm->frame_type == KEY_FRAME)
418       get_variance(&vt.part_variances->none);
419     if (mi_col + block_width / 2 < cm->mi_cols &&
420         mi_row + block_height / 2 < cm->mi_rows &&
421         vt.part_variances->none.variance < threshold) {
422       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
423       return 1;
424     }
425     return 0;
426   } else if (bsize > bsize_min) {
427     // Variance already computed to set the force_split.
428     if (low_res || cm->frame_type == KEY_FRAME)
429       get_variance(&vt.part_variances->none);
430     // For key frame: take split for bsize above 32X32 or very high variance.
431     if (cm->frame_type == KEY_FRAME &&
432         (bsize > BLOCK_32X32 ||
433         vt.part_variances->none.variance > (threshold << 4))) {
434       return 0;
435     }
436     // If variance is low, take the bsize (no split).
437     if (mi_col + block_width / 2 < cm->mi_cols &&
438         mi_row + block_height / 2 < cm->mi_rows &&
439         vt.part_variances->none.variance < threshold) {
440       set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
441       return 1;
442     }
443 
444     // Check vertical split.
445     if (mi_row + block_height / 2 < cm->mi_rows) {
446       BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT);
447       get_variance(&vt.part_variances->vert[0]);
448       get_variance(&vt.part_variances->vert[1]);
449       if (vt.part_variances->vert[0].variance < threshold &&
450           vt.part_variances->vert[1].variance < threshold &&
451           get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
452         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
453         set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
454         return 1;
455       }
456     }
457     // Check horizontal split.
458     if (mi_col + block_width / 2 < cm->mi_cols) {
459       BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ);
460       get_variance(&vt.part_variances->horz[0]);
461       get_variance(&vt.part_variances->horz[1]);
462       if (vt.part_variances->horz[0].variance < threshold &&
463           vt.part_variances->horz[1].variance < threshold &&
464           get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) {
465         set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
466         set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
467         return 1;
468       }
469     }
470 
471     return 0;
472   }
473   return 0;
474 }
475 
476 // Set the variance split thresholds for following the block sizes:
477 // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16,
478 // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is
479 // currently only used on key frame.
set_vbp_thresholds(VP9_COMP * cpi,int64_t thresholds[],int q)480 static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q) {
481   VP9_COMMON *const cm = &cpi->common;
482   const int is_key_frame = (cm->frame_type == KEY_FRAME);
483   const int threshold_multiplier = is_key_frame ? 20 : 1;
484   const int64_t threshold_base = (int64_t)(threshold_multiplier *
485       cpi->y_dequant[q][1]);
486   if (is_key_frame) {
487     thresholds[0] = threshold_base;
488     thresholds[1] = threshold_base >> 2;
489     thresholds[2] = threshold_base >> 2;
490     thresholds[3] = threshold_base << 2;
491   } else {
492     thresholds[1] = threshold_base;
493     if (cm->width <= 352 && cm->height <= 288) {
494       thresholds[0] = threshold_base >> 2;
495       thresholds[2] = threshold_base << 3;
496     } else {
497       thresholds[0] = threshold_base;
498       thresholds[1] = (5 * threshold_base) >> 2;
499       if (cm->width >= 1920 && cm->height >= 1080)
500         thresholds[1] = (7 * threshold_base) >> 2;
501       thresholds[2] = threshold_base << cpi->oxcf.speed;
502     }
503   }
504 }
505 
vp9_set_variance_partition_thresholds(VP9_COMP * cpi,int q)506 void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q) {
507   VP9_COMMON *const cm = &cpi->common;
508   SPEED_FEATURES *const sf = &cpi->sf;
509   const int is_key_frame = (cm->frame_type == KEY_FRAME);
510   if (sf->partition_search_type != VAR_BASED_PARTITION &&
511       sf->partition_search_type != REFERENCE_PARTITION) {
512     return;
513   } else {
514     set_vbp_thresholds(cpi, cpi->vbp_thresholds, q);
515     // The thresholds below are not changed locally.
516     if (is_key_frame) {
517       cpi->vbp_threshold_sad = 0;
518       cpi->vbp_bsize_min = BLOCK_8X8;
519     } else {
520       if (cm->width <= 352 && cm->height <= 288)
521         cpi->vbp_threshold_sad = 100;
522       else
523         cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ?
524             (cpi->y_dequant[q][1] << 1) : 1000;
525       cpi->vbp_bsize_min = BLOCK_16X16;
526     }
527     cpi->vbp_threshold_minmax = 15 + (q >> 3);
528   }
529 }
530 
531 // Compute the minmax over the 8x8 subblocks.
compute_minmax_8x8(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,int highbd_flag,int pixels_wide,int pixels_high)532 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
533                               int dp, int x16_idx, int y16_idx,
534 #if CONFIG_VP9_HIGHBITDEPTH
535                               int highbd_flag,
536 #endif
537                               int pixels_wide,
538                               int pixels_high) {
539   int k;
540   int minmax_max = 0;
541   int minmax_min = 255;
542   // Loop over the 4 8x8 subblocks.
543   for (k = 0; k < 4; k++) {
544     int x8_idx = x16_idx + ((k & 1) << 3);
545     int y8_idx = y16_idx + ((k >> 1) << 3);
546     int min = 0;
547     int max = 0;
548     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
549 #if CONFIG_VP9_HIGHBITDEPTH
550       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
551         vp9_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
552                               d + y8_idx * dp + x8_idx, dp,
553                               &min, &max);
554       } else {
555         vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
556                        d + y8_idx * dp + x8_idx, dp,
557                        &min, &max);
558       }
559 #else
560       vp9_minmax_8x8(s + y8_idx * sp + x8_idx, sp,
561                      d + y8_idx * dp + x8_idx, dp,
562                      &min, &max);
563 #endif
564       if ((max - min) > minmax_max)
565         minmax_max = (max - min);
566       if ((max - min) < minmax_min)
567         minmax_min = (max - min);
568     }
569   }
570   return (minmax_max - minmax_min);
571 }
572 
fill_variance_4x4avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x8_idx,int y8_idx,v8x8 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)573 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
574                                  int dp, int x8_idx, int y8_idx, v8x8 *vst,
575 #if CONFIG_VP9_HIGHBITDEPTH
576                                  int highbd_flag,
577 #endif
578                                  int pixels_wide,
579                                  int pixels_high,
580                                  int is_key_frame) {
581   int k;
582   for (k = 0; k < 4; k++) {
583     int x4_idx = x8_idx + ((k & 1) << 2);
584     int y4_idx = y8_idx + ((k >> 1) << 2);
585     unsigned int sse = 0;
586     int sum = 0;
587     if (x4_idx < pixels_wide && y4_idx < pixels_high) {
588       int s_avg;
589       int d_avg = 128;
590 #if CONFIG_VP9_HIGHBITDEPTH
591       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
592         s_avg = vp9_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp);
593         if (!is_key_frame)
594           d_avg = vp9_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp);
595       } else {
596         s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
597         if (!is_key_frame)
598           d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
599       }
600 #else
601       s_avg = vp9_avg_4x4(s + y4_idx * sp + x4_idx, sp);
602       if (!is_key_frame)
603         d_avg = vp9_avg_4x4(d + y4_idx * dp + x4_idx, dp);
604 #endif
605       sum = s_avg - d_avg;
606       sse = sum * sum;
607     }
608     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
609   }
610 }
611 
fill_variance_8x8avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,v16x16 * vst,int highbd_flag,int pixels_wide,int pixels_high,int is_key_frame)612 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
613                                  int dp, int x16_idx, int y16_idx, v16x16 *vst,
614 #if CONFIG_VP9_HIGHBITDEPTH
615                                  int highbd_flag,
616 #endif
617                                  int pixels_wide,
618                                  int pixels_high,
619                                  int is_key_frame) {
620   int k;
621   for (k = 0; k < 4; k++) {
622     int x8_idx = x16_idx + ((k & 1) << 3);
623     int y8_idx = y16_idx + ((k >> 1) << 3);
624     unsigned int sse = 0;
625     int sum = 0;
626     if (x8_idx < pixels_wide && y8_idx < pixels_high) {
627       int s_avg;
628       int d_avg = 128;
629 #if CONFIG_VP9_HIGHBITDEPTH
630       if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) {
631         s_avg = vp9_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp);
632         if (!is_key_frame)
633           d_avg = vp9_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp);
634       } else {
635         s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
636         if (!is_key_frame)
637           d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
638       }
639 #else
640       s_avg = vp9_avg_8x8(s + y8_idx * sp + x8_idx, sp);
641       if (!is_key_frame)
642         d_avg = vp9_avg_8x8(d + y8_idx * dp + x8_idx, dp);
643 #endif
644       sum = s_avg - d_avg;
645       sse = sum * sum;
646     }
647     fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
648   }
649 }
650 
651 // This function chooses partitioning based on the variance between source and
652 // reconstructed last, where variance is computed for down-sampled inputs.
choose_partitioning(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)653 static int choose_partitioning(VP9_COMP *cpi,
654                                 const TileInfo *const tile,
655                                 MACROBLOCK *x,
656                                 int mi_row, int mi_col) {
657   VP9_COMMON * const cm = &cpi->common;
658   MACROBLOCKD *xd = &x->e_mbd;
659   int i, j, k, m;
660   v64x64 vt;
661   v16x16 vt2[16];
662   int force_split[21];
663   uint8_t *s;
664   const uint8_t *d;
665   int sp;
666   int dp;
667   int pixels_wide = 64, pixels_high = 64;
668   int64_t thresholds[4] = {cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
669       cpi->vbp_thresholds[2], cpi->vbp_thresholds[3]};
670 
671   // Always use 4x4 partition for key frame.
672   const int is_key_frame = (cm->frame_type == KEY_FRAME);
673   const int use_4x4_partition = is_key_frame;
674   const int low_res = (cm->width <= 352 && cm->height <= 288);
675   int variance4x4downsample[16];
676 
677   int segment_id = CR_SEGMENT_ID_BASE;
678   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) {
679     const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map :
680                                                     cm->last_frame_seg_map;
681     segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
682 
683     if (cyclic_refresh_segment_id_boosted(segment_id)) {
684       int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex);
685       set_vbp_thresholds(cpi, thresholds, q);
686     }
687   }
688 
689   set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64);
690 
691   if (xd->mb_to_right_edge < 0)
692     pixels_wide += (xd->mb_to_right_edge >> 3);
693   if (xd->mb_to_bottom_edge < 0)
694     pixels_high += (xd->mb_to_bottom_edge >> 3);
695 
696   s = x->plane[0].src.buf;
697   sp = x->plane[0].src.stride;
698 
699   if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) &&
700       cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) {
701     // In the case of spatial/temporal scalable coding, the assumption here is
702     // that the temporal reference frame will always be of type LAST_FRAME.
703     // TODO(marpan): If that assumption is broken, we need to revisit this code.
704     MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi;
705     unsigned int uv_sad;
706     const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME);
707 
708     const YV12_BUFFER_CONFIG *yv12_g = NULL;
709     unsigned int y_sad, y_sad_g;
710     const BLOCK_SIZE bsize = BLOCK_32X32
711         + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows);
712 
713     assert(yv12 != NULL);
714 
715     if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) {
716       // For now, GOLDEN will not be used for non-zero spatial layers, since
717       // it may not be a temporal reference.
718       yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME);
719     }
720 
721     if (yv12_g && yv12_g != yv12 &&
722        (cpi->ref_frame_flags & VP9_GOLD_FLAG)) {
723       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
724                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
725       y_sad_g = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf,
726                                        x->plane[0].src.stride,
727                                        xd->plane[0].pre[0].buf,
728                                        xd->plane[0].pre[0].stride);
729     } else {
730       y_sad_g = UINT_MAX;
731     }
732 
733     vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
734                          &cm->frame_refs[LAST_FRAME - 1].sf);
735     mbmi->ref_frame[0] = LAST_FRAME;
736     mbmi->ref_frame[1] = NONE;
737     mbmi->sb_type = BLOCK_64X64;
738     mbmi->mv[0].as_int = 0;
739     mbmi->interp_filter = BILINEAR;
740 
741     y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col);
742     if (y_sad_g < y_sad) {
743       vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col,
744                            &cm->frame_refs[GOLDEN_FRAME - 1].sf);
745       mbmi->ref_frame[0] = GOLDEN_FRAME;
746       mbmi->mv[0].as_int = 0;
747       y_sad = y_sad_g;
748     } else {
749       x->pred_mv[LAST_FRAME] = mbmi->mv[0].as_mv;
750     }
751 
752     vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64);
753 
754     for (i = 1; i <= 2; ++i) {
755       struct macroblock_plane  *p = &x->plane[i];
756       struct macroblockd_plane *pd = &xd->plane[i];
757       const BLOCK_SIZE bs = get_plane_block_size(bsize, pd);
758 
759       if (bs == BLOCK_INVALID)
760         uv_sad = UINT_MAX;
761       else
762         uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride,
763                                      pd->dst.buf, pd->dst.stride);
764 
765       x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2);
766     }
767 
768     d = xd->plane[0].dst.buf;
769     dp = xd->plane[0].dst.stride;
770 
771     // If the y_sad is very small, take 64x64 as partition and exit.
772     // Don't check on boosted segment for now, as 64x64 is suppressed there.
773     if (segment_id == CR_SEGMENT_ID_BASE &&
774         y_sad < cpi->vbp_threshold_sad) {
775       const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64];
776       const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64];
777       if (mi_col + block_width / 2 < cm->mi_cols &&
778           mi_row + block_height / 2 < cm->mi_rows) {
779         set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64);
780         return 0;
781       }
782     }
783   } else {
784     d = VP9_VAR_OFFS;
785     dp = 0;
786 #if CONFIG_VP9_HIGHBITDEPTH
787     if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
788       switch (xd->bd) {
789         case 10:
790           d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10);
791           break;
792         case 12:
793           d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12);
794           break;
795         case 8:
796         default:
797           d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8);
798           break;
799       }
800     }
801 #endif  // CONFIG_VP9_HIGHBITDEPTH
802   }
803 
804   // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
805   // 5-20 for the 16x16 blocks.
806   force_split[0] = 0;
807   // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
808   // for splits.
809   for (i = 0; i < 4; i++) {
810     const int x32_idx = ((i & 1) << 5);
811     const int y32_idx = ((i >> 1) << 5);
812     const int i2 = i << 2;
813     force_split[i + 1] = 0;
814     for (j = 0; j < 4; j++) {
815       const int x16_idx = x32_idx + ((j & 1) << 4);
816       const int y16_idx = y32_idx + ((j >> 1) << 4);
817       const int split_index = 5 + i2 + j;
818       v16x16 *vst = &vt.split[i].split[j];
819       force_split[split_index] = 0;
820       variance4x4downsample[i2 + j] = 0;
821       if (!is_key_frame) {
822         fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst,
823 #if CONFIG_VP9_HIGHBITDEPTH
824                             xd->cur_buf->flags,
825 #endif
826                             pixels_wide,
827                             pixels_high,
828                             is_key_frame);
829         fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16);
830         get_variance(&vt.split[i].split[j].part_variances.none);
831         if (vt.split[i].split[j].part_variances.none.variance >
832             thresholds[2]) {
833           // 16X16 variance is above threshold for split, so force split to 8x8
834           // for this 16x16 block (this also forces splits for upper levels).
835           force_split[split_index] = 1;
836           force_split[i + 1] = 1;
837           force_split[0] = 1;
838         } else if (vt.split[i].split[j].part_variances.none.variance >
839                    thresholds[1] &&
840                    !cyclic_refresh_segment_id_boosted(segment_id)) {
841           // We have some nominal amount of 16x16 variance (based on average),
842           // compute the minmax over the 8x8 sub-blocks, and if above threshold,
843           // force split to 8x8 block for this 16x16 block.
844           int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
845 #if CONFIG_VP9_HIGHBITDEPTH
846                                           xd->cur_buf->flags,
847 #endif
848                                           pixels_wide, pixels_high);
849           if (minmax > cpi->vbp_threshold_minmax) {
850             force_split[split_index] = 1;
851             force_split[i + 1] = 1;
852             force_split[0] = 1;
853           }
854         }
855       }
856       // TODO(marpan): There is an issue with variance based on 4x4 average in
857       // svc mode, don't allow it for now.
858       if (is_key_frame || (low_res && !cpi->use_svc &&
859           vt.split[i].split[j].part_variances.none.variance >
860           (thresholds[1] << 1))) {
861         force_split[split_index] = 0;
862         // Go down to 4x4 down-sampling for variance.
863         variance4x4downsample[i2 + j] = 1;
864         for (k = 0; k < 4; k++) {
865           int x8_idx = x16_idx + ((k & 1) << 3);
866           int y8_idx = y16_idx + ((k >> 1) << 3);
867           v8x8 *vst2 = is_key_frame ? &vst->split[k] :
868               &vt2[i2 + j].split[k];
869           fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
870 #if CONFIG_VP9_HIGHBITDEPTH
871                                xd->cur_buf->flags,
872 #endif
873                                pixels_wide,
874                                pixels_high,
875                                is_key_frame);
876         }
877       }
878     }
879   }
880 
881   // Fill the rest of the variance tree by summing split partition values.
882   for (i = 0; i < 4; i++) {
883     const int i2 = i << 2;
884     for (j = 0; j < 4; j++) {
885       if (variance4x4downsample[i2 + j] == 1) {
886         v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] :
887             &vt.split[i].split[j];
888         for (m = 0; m < 4; m++)
889           fill_variance_tree(&vtemp->split[m], BLOCK_8X8);
890         fill_variance_tree(vtemp, BLOCK_16X16);
891       }
892     }
893     fill_variance_tree(&vt.split[i], BLOCK_32X32);
894     // If variance of this 32x32 block is above the threshold, force the block
895     // to split. This also forces a split on the upper (64x64) level.
896     if (!force_split[i + 1]) {
897       get_variance(&vt.split[i].part_variances.none);
898       if (vt.split[i].part_variances.none.variance > thresholds[1]) {
899         force_split[i + 1] = 1;
900         force_split[0] = 1;
901       }
902     }
903   }
904   if (!force_split[0]) {
905     fill_variance_tree(&vt, BLOCK_64X64);
906     get_variance(&vt.part_variances.none);
907   }
908 
909   // Now go through the entire structure, splitting every block size until
910   // we get to one that's got a variance lower than our threshold.
911   if ( mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows ||
912       !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col,
913                            thresholds[0], BLOCK_16X16, force_split[0])) {
914     for (i = 0; i < 4; ++i) {
915       const int x32_idx = ((i & 1) << 2);
916       const int y32_idx = ((i >> 1) << 2);
917       const int i2 = i << 2;
918       if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32,
919                                (mi_row + y32_idx), (mi_col + x32_idx),
920                                thresholds[1], BLOCK_16X16,
921                                force_split[i + 1])) {
922         for (j = 0; j < 4; ++j) {
923           const int x16_idx = ((j & 1) << 1);
924           const int y16_idx = ((j >> 1) << 1);
925           // For inter frames: if variance4x4downsample[] == 1 for this 16x16
926           // block, then the variance is based on 4x4 down-sampling, so use vt2
927           // in set_vt_partioning(), otherwise use vt.
928           v16x16 *vtemp = (!is_key_frame &&
929                            variance4x4downsample[i2 + j] == 1) ?
930                            &vt2[i2 + j] : &vt.split[i].split[j];
931           if (!set_vt_partitioning(cpi, x, xd, vtemp, BLOCK_16X16,
932                                    mi_row + y32_idx + y16_idx,
933                                    mi_col + x32_idx + x16_idx,
934                                    thresholds[2],
935                                    cpi->vbp_bsize_min,
936                                    force_split[5 + i2  + j])) {
937             for (k = 0; k < 4; ++k) {
938               const int x8_idx = (k & 1);
939               const int y8_idx = (k >> 1);
940               if (use_4x4_partition) {
941                 if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k],
942                                          BLOCK_8X8,
943                                          mi_row + y32_idx + y16_idx + y8_idx,
944                                          mi_col + x32_idx + x16_idx + x8_idx,
945                                          thresholds[3], BLOCK_8X8, 0)) {
946                   set_block_size(cpi, x, xd,
947                                  (mi_row + y32_idx + y16_idx + y8_idx),
948                                  (mi_col + x32_idx + x16_idx + x8_idx),
949                                  BLOCK_4X4);
950                 }
951               } else {
952                 set_block_size(cpi, x, xd,
953                                (mi_row + y32_idx + y16_idx + y8_idx),
954                                (mi_col + x32_idx + x16_idx + x8_idx),
955                                BLOCK_8X8);
956               }
957             }
958           }
959         }
960       }
961     }
962   }
963   return 0;
964 }
965 
update_state(VP9_COMP * cpi,ThreadData * td,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled)966 static void update_state(VP9_COMP *cpi, ThreadData *td,
967                          PICK_MODE_CONTEXT *ctx,
968                          int mi_row, int mi_col, BLOCK_SIZE bsize,
969                          int output_enabled) {
970   int i, x_idx, y;
971   VP9_COMMON *const cm = &cpi->common;
972   RD_COUNTS *const rdc = &td->rd_counts;
973   MACROBLOCK *const x = &td->mb;
974   MACROBLOCKD *const xd = &x->e_mbd;
975   struct macroblock_plane *const p = x->plane;
976   struct macroblockd_plane *const pd = xd->plane;
977   MODE_INFO *mi = &ctx->mic;
978   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
979   MODE_INFO *mi_addr = xd->mi[0];
980   const struct segmentation *const seg = &cm->seg;
981   const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
982   const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
983   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
984   const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
985   MV_REF *const frame_mvs =
986       cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
987   int w, h;
988 
989   const int mis = cm->mi_stride;
990   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
991   const int mi_height = num_8x8_blocks_high_lookup[bsize];
992   int max_plane;
993 
994   assert(mi->mbmi.sb_type == bsize);
995 
996   *mi_addr = *mi;
997   *x->mbmi_ext = ctx->mbmi_ext;
998 
999   // If segmentation in use
1000   if (seg->enabled) {
1001     // For in frame complexity AQ copy the segment id from the segment map.
1002     if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) {
1003       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
1004                                                  : cm->last_frame_seg_map;
1005       mi_addr->mbmi.segment_id =
1006         get_segment_id(cm, map, bsize, mi_row, mi_col);
1007     }
1008     // Else for cyclic refresh mode update the segment map, set the segment id
1009     // and then update the quantizer.
1010     if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) {
1011       vp9_cyclic_refresh_update_segment(cpi, &xd->mi[0]->mbmi, mi_row,
1012                                         mi_col, bsize, ctx->rate, ctx->dist,
1013                                         x->skip);
1014     }
1015   }
1016 
1017   max_plane = is_inter_block(mbmi) ? MAX_MB_PLANE : 1;
1018   for (i = 0; i < max_plane; ++i) {
1019     p[i].coeff = ctx->coeff_pbuf[i][1];
1020     p[i].qcoeff = ctx->qcoeff_pbuf[i][1];
1021     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1];
1022     p[i].eobs = ctx->eobs_pbuf[i][1];
1023   }
1024 
1025   for (i = max_plane; i < MAX_MB_PLANE; ++i) {
1026     p[i].coeff = ctx->coeff_pbuf[i][2];
1027     p[i].qcoeff = ctx->qcoeff_pbuf[i][2];
1028     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2];
1029     p[i].eobs = ctx->eobs_pbuf[i][2];
1030   }
1031 
1032   // Restore the coding context of the MB to that that was in place
1033   // when the mode was picked for it
1034   for (y = 0; y < mi_height; y++)
1035     for (x_idx = 0; x_idx < mi_width; x_idx++)
1036       if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx
1037         && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) {
1038         xd->mi[x_idx + y * mis] = mi_addr;
1039       }
1040 
1041   if (cpi->oxcf.aq_mode)
1042     vp9_init_plane_quantizers(cpi, x);
1043 
1044   if (is_inter_block(mbmi) && mbmi->sb_type < BLOCK_8X8) {
1045     mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
1046     mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
1047   }
1048 
1049   x->skip = ctx->skip;
1050   memcpy(x->zcoeff_blk[mbmi->tx_size], ctx->zcoeff_blk,
1051          sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk);
1052 
1053   if (!output_enabled)
1054     return;
1055 
1056 #if CONFIG_INTERNAL_STATS
1057   if (frame_is_intra_only(cm)) {
1058     static const int kf_mode_index[] = {
1059       THR_DC        /*DC_PRED*/,
1060       THR_V_PRED    /*V_PRED*/,
1061       THR_H_PRED    /*H_PRED*/,
1062       THR_D45_PRED  /*D45_PRED*/,
1063       THR_D135_PRED /*D135_PRED*/,
1064       THR_D117_PRED /*D117_PRED*/,
1065       THR_D153_PRED /*D153_PRED*/,
1066       THR_D207_PRED /*D207_PRED*/,
1067       THR_D63_PRED  /*D63_PRED*/,
1068       THR_TM        /*TM_PRED*/,
1069     };
1070     ++cpi->mode_chosen_counts[kf_mode_index[mbmi->mode]];
1071   } else {
1072     // Note how often each mode chosen as best
1073     ++cpi->mode_chosen_counts[ctx->best_mode_index];
1074   }
1075 #endif
1076   if (!frame_is_intra_only(cm)) {
1077     if (is_inter_block(mbmi)) {
1078       vp9_update_mv_count(td);
1079 
1080       if (cm->interp_filter == SWITCHABLE) {
1081         const int ctx = vp9_get_pred_context_switchable_interp(xd);
1082         ++td->counts->switchable_interp[ctx][mbmi->interp_filter];
1083       }
1084     }
1085 
1086     rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff;
1087     rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff;
1088     rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff;
1089 
1090     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
1091       rdc->filter_diff[i] += ctx->best_filter_diff[i];
1092   }
1093 
1094   for (h = 0; h < y_mis; ++h) {
1095     MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
1096     for (w = 0; w < x_mis; ++w) {
1097       MV_REF *const mv = frame_mv + w;
1098       mv->ref_frame[0] = mi->mbmi.ref_frame[0];
1099       mv->ref_frame[1] = mi->mbmi.ref_frame[1];
1100       mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
1101       mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
1102     }
1103   }
1104 }
1105 
vp9_setup_src_planes(MACROBLOCK * x,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col)1106 void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src,
1107                           int mi_row, int mi_col) {
1108   uint8_t *const buffers[3] = {src->y_buffer, src->u_buffer, src->v_buffer };
1109   const int strides[3] = {src->y_stride, src->uv_stride, src->uv_stride };
1110   int i;
1111 
1112   // Set current frame pointer.
1113   x->e_mbd.cur_buf = src;
1114 
1115   for (i = 0; i < MAX_MB_PLANE; i++)
1116     setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col,
1117                      NULL, x->e_mbd.plane[i].subsampling_x,
1118                      x->e_mbd.plane[i].subsampling_y);
1119 }
1120 
set_mode_info_seg_skip(MACROBLOCK * x,TX_MODE tx_mode,RD_COST * rd_cost,BLOCK_SIZE bsize)1121 static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode,
1122                                    RD_COST *rd_cost, BLOCK_SIZE bsize) {
1123   MACROBLOCKD *const xd = &x->e_mbd;
1124   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1125   INTERP_FILTER filter_ref;
1126 
1127   if (xd->up_available)
1128     filter_ref = xd->mi[-xd->mi_stride]->mbmi.interp_filter;
1129   else if (xd->left_available)
1130     filter_ref = xd->mi[-1]->mbmi.interp_filter;
1131   else
1132     filter_ref = EIGHTTAP;
1133 
1134   mbmi->sb_type = bsize;
1135   mbmi->mode = ZEROMV;
1136   mbmi->tx_size =
1137       VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]);
1138   mbmi->skip = 1;
1139   mbmi->uv_mode = DC_PRED;
1140   mbmi->ref_frame[0] = LAST_FRAME;
1141   mbmi->ref_frame[1] = NONE;
1142   mbmi->mv[0].as_int = 0;
1143   mbmi->interp_filter = filter_ref;
1144 
1145   xd->mi[0]->bmi[0].as_mv[0].as_int = 0;
1146   x->skip = 1;
1147 
1148   vp9_rd_cost_init(rd_cost);
1149 }
1150 
set_segment_rdmult(VP9_COMP * const cpi,MACROBLOCK * const x,int8_t segment_id)1151 static int set_segment_rdmult(VP9_COMP *const cpi,
1152                                MACROBLOCK *const x,
1153                                int8_t segment_id) {
1154   int segment_qindex;
1155   VP9_COMMON *const cm = &cpi->common;
1156   vp9_init_plane_quantizers(cpi, x);
1157   vpx_clear_system_state();
1158   segment_qindex = vp9_get_qindex(&cm->seg, segment_id,
1159                                   cm->base_qindex);
1160   return vp9_compute_rd_mult(cpi, segment_qindex + cm->y_dc_delta_q);
1161 }
1162 
rd_pick_sb_modes(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx,int64_t best_rd)1163 static void rd_pick_sb_modes(VP9_COMP *cpi,
1164                              TileDataEnc *tile_data,
1165                              MACROBLOCK *const x,
1166                              int mi_row, int mi_col, RD_COST *rd_cost,
1167                              BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx,
1168                              int64_t best_rd) {
1169   VP9_COMMON *const cm = &cpi->common;
1170   TileInfo *const tile_info = &tile_data->tile_info;
1171   MACROBLOCKD *const xd = &x->e_mbd;
1172   MB_MODE_INFO *mbmi;
1173   struct macroblock_plane *const p = x->plane;
1174   struct macroblockd_plane *const pd = xd->plane;
1175   const AQ_MODE aq_mode = cpi->oxcf.aq_mode;
1176   int i, orig_rdmult;
1177 
1178   vpx_clear_system_state();
1179 
1180   // Use the lower precision, but faster, 32x32 fdct for mode selection.
1181   x->use_lp32x32fdct = 1;
1182 
1183   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1184   mbmi = &xd->mi[0]->mbmi;
1185   mbmi->sb_type = bsize;
1186 
1187   for (i = 0; i < MAX_MB_PLANE; ++i) {
1188     p[i].coeff = ctx->coeff_pbuf[i][0];
1189     p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
1190     pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
1191     p[i].eobs = ctx->eobs_pbuf[i][0];
1192   }
1193   ctx->is_coded = 0;
1194   ctx->skippable = 0;
1195   ctx->pred_pixel_ready = 0;
1196   x->skip_recode = 0;
1197 
1198   // Set to zero to make sure we do not use the previous encoded frame stats
1199   mbmi->skip = 0;
1200 
1201 #if CONFIG_VP9_HIGHBITDEPTH
1202   if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) {
1203     x->source_variance =
1204         vp9_high_get_sby_perpixel_variance(cpi, &x->plane[0].src,
1205                                            bsize, xd->bd);
1206   } else {
1207     x->source_variance =
1208       vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1209   }
1210 #else
1211   x->source_variance =
1212     vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize);
1213 #endif  // CONFIG_VP9_HIGHBITDEPTH
1214 
1215   // Save rdmult before it might be changed, so it can be restored later.
1216   orig_rdmult = x->rdmult;
1217 
1218   if (aq_mode == VARIANCE_AQ) {
1219     const int energy = bsize <= BLOCK_16X16 ? x->mb_energy
1220                                             : vp9_block_energy(cpi, x, bsize);
1221     if (cm->frame_type == KEY_FRAME ||
1222         cpi->refresh_alt_ref_frame ||
1223         (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) {
1224       mbmi->segment_id = vp9_vaq_segment_id(energy);
1225     } else {
1226       const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
1227                                                     : cm->last_frame_seg_map;
1228       mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1229     }
1230     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
1231   } else if (aq_mode == COMPLEXITY_AQ) {
1232     x->rdmult = set_segment_rdmult(cpi, x, mbmi->segment_id);
1233   } else if (aq_mode == CYCLIC_REFRESH_AQ) {
1234     const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map
1235                                                   : cm->last_frame_seg_map;
1236     // If segment is boosted, use rdmult for that segment.
1237     if (cyclic_refresh_segment_id_boosted(
1238             get_segment_id(cm, map, bsize, mi_row, mi_col)))
1239       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
1240   }
1241 
1242   // Find best coding mode & reconstruct the MB so it is available
1243   // as a predictor for MBs that follow in the SB
1244   if (frame_is_intra_only(cm)) {
1245     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd);
1246   } else {
1247     if (bsize >= BLOCK_8X8) {
1248       if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
1249         vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize,
1250                                            ctx, best_rd);
1251       else
1252         vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col,
1253                                   rd_cost, bsize, ctx, best_rd);
1254     } else {
1255       vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col,
1256                                     rd_cost, bsize, ctx, best_rd);
1257     }
1258   }
1259 
1260 
1261   // Examine the resulting rate and for AQ mode 2 make a segment choice.
1262   if ((rd_cost->rate != INT_MAX) &&
1263       (aq_mode == COMPLEXITY_AQ) && (bsize >= BLOCK_16X16) &&
1264       (cm->frame_type == KEY_FRAME ||
1265        cpi->refresh_alt_ref_frame ||
1266        (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) {
1267     vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate);
1268   }
1269 
1270   x->rdmult = orig_rdmult;
1271 
1272   // TODO(jingning) The rate-distortion optimization flow needs to be
1273   // refactored to provide proper exit/return handle.
1274   if (rd_cost->rate == INT_MAX)
1275     rd_cost->rdcost = INT64_MAX;
1276 
1277   ctx->rate = rd_cost->rate;
1278   ctx->dist = rd_cost->dist;
1279 }
1280 
update_stats(VP9_COMMON * cm,ThreadData * td)1281 static void update_stats(VP9_COMMON *cm, ThreadData *td) {
1282   const MACROBLOCK *x = &td->mb;
1283   const MACROBLOCKD *const xd = &x->e_mbd;
1284   const MODE_INFO *const mi = xd->mi[0];
1285   const MB_MODE_INFO *const mbmi = &mi->mbmi;
1286   const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext;
1287   const BLOCK_SIZE bsize = mbmi->sb_type;
1288 
1289   if (!frame_is_intra_only(cm)) {
1290     FRAME_COUNTS *const counts = td->counts;
1291     const int inter_block = is_inter_block(mbmi);
1292     const int seg_ref_active = segfeature_active(&cm->seg, mbmi->segment_id,
1293                                                  SEG_LVL_REF_FRAME);
1294     if (!seg_ref_active) {
1295       counts->intra_inter[vp9_get_intra_inter_context(xd)][inter_block]++;
1296       // If the segment reference feature is enabled we have only a single
1297       // reference frame allowed for the segment so exclude it from
1298       // the reference frame counts used to work out probabilities.
1299       if (inter_block) {
1300         const MV_REFERENCE_FRAME ref0 = mbmi->ref_frame[0];
1301         if (cm->reference_mode == REFERENCE_MODE_SELECT)
1302           counts->comp_inter[vp9_get_reference_mode_context(cm, xd)]
1303                             [has_second_ref(mbmi)]++;
1304 
1305         if (has_second_ref(mbmi)) {
1306           counts->comp_ref[vp9_get_pred_context_comp_ref_p(cm, xd)]
1307                           [ref0 == GOLDEN_FRAME]++;
1308         } else {
1309           counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0]
1310                             [ref0 != LAST_FRAME]++;
1311           if (ref0 != LAST_FRAME)
1312             counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1]
1313                               [ref0 != GOLDEN_FRAME]++;
1314         }
1315       }
1316     }
1317     if (inter_block &&
1318         !segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP)) {
1319       const int mode_ctx = mbmi_ext->mode_context[mbmi->ref_frame[0]];
1320       if (bsize >= BLOCK_8X8) {
1321         const PREDICTION_MODE mode = mbmi->mode;
1322         ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)];
1323       } else {
1324         const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
1325         const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
1326         int idx, idy;
1327         for (idy = 0; idy < 2; idy += num_4x4_h) {
1328           for (idx = 0; idx < 2; idx += num_4x4_w) {
1329             const int j = idy * 2 + idx;
1330             const PREDICTION_MODE b_mode = mi->bmi[j].as_mode;
1331             ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)];
1332           }
1333         }
1334       }
1335     }
1336   }
1337 }
1338 
restore_context(MACROBLOCK * const x,int mi_row,int mi_col,ENTROPY_CONTEXT a[16* MAX_MB_PLANE],ENTROPY_CONTEXT l[16* MAX_MB_PLANE],PARTITION_CONTEXT sa[8],PARTITION_CONTEXT sl[8],BLOCK_SIZE bsize)1339 static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col,
1340                             ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
1341                             ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
1342                             PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
1343                             BLOCK_SIZE bsize) {
1344   MACROBLOCKD *const xd = &x->e_mbd;
1345   int p;
1346   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1347   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1348   int mi_width = num_8x8_blocks_wide_lookup[bsize];
1349   int mi_height = num_8x8_blocks_high_lookup[bsize];
1350   for (p = 0; p < MAX_MB_PLANE; p++) {
1351     memcpy(
1352         xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x),
1353         a + num_4x4_blocks_wide * p,
1354         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1355         xd->plane[p].subsampling_x);
1356     memcpy(
1357         xd->left_context[p]
1358             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
1359         l + num_4x4_blocks_high * p,
1360         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1361         xd->plane[p].subsampling_y);
1362   }
1363   memcpy(xd->above_seg_context + mi_col, sa,
1364          sizeof(*xd->above_seg_context) * mi_width);
1365   memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl,
1366          sizeof(xd->left_seg_context[0]) * mi_height);
1367 }
1368 
save_context(MACROBLOCK * const x,int mi_row,int mi_col,ENTROPY_CONTEXT a[16* MAX_MB_PLANE],ENTROPY_CONTEXT l[16* MAX_MB_PLANE],PARTITION_CONTEXT sa[8],PARTITION_CONTEXT sl[8],BLOCK_SIZE bsize)1369 static void save_context(MACROBLOCK *const x, int mi_row, int mi_col,
1370                          ENTROPY_CONTEXT a[16 * MAX_MB_PLANE],
1371                          ENTROPY_CONTEXT l[16 * MAX_MB_PLANE],
1372                          PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8],
1373                          BLOCK_SIZE bsize) {
1374   const MACROBLOCKD *const xd = &x->e_mbd;
1375   int p;
1376   const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize];
1377   const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize];
1378   int mi_width = num_8x8_blocks_wide_lookup[bsize];
1379   int mi_height = num_8x8_blocks_high_lookup[bsize];
1380 
1381   // buffer the above/left context information of the block in search.
1382   for (p = 0; p < MAX_MB_PLANE; ++p) {
1383     memcpy(
1384         a + num_4x4_blocks_wide * p,
1385         xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x),
1386         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >>
1387         xd->plane[p].subsampling_x);
1388     memcpy(
1389         l + num_4x4_blocks_high * p,
1390         xd->left_context[p]
1391             + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y),
1392         (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >>
1393         xd->plane[p].subsampling_y);
1394   }
1395   memcpy(sa, xd->above_seg_context + mi_col,
1396          sizeof(*xd->above_seg_context) * mi_width);
1397   memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK),
1398          sizeof(xd->left_seg_context[0]) * mi_height);
1399 }
1400 
encode_b(VP9_COMP * cpi,const TileInfo * const tile,ThreadData * td,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)1401 static void encode_b(VP9_COMP *cpi, const TileInfo *const tile,
1402                      ThreadData *td,
1403                      TOKENEXTRA **tp, int mi_row, int mi_col,
1404                      int output_enabled, BLOCK_SIZE bsize,
1405                      PICK_MODE_CONTEXT *ctx) {
1406   MACROBLOCK *const x = &td->mb;
1407   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
1408   update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled);
1409   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
1410 
1411   if (output_enabled) {
1412     update_stats(&cpi->common, td);
1413 
1414     (*tp)->token = EOSB_TOKEN;
1415     (*tp)++;
1416   }
1417 }
1418 
encode_sb(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PC_TREE * pc_tree)1419 static void encode_sb(VP9_COMP *cpi, ThreadData *td,
1420                       const TileInfo *const tile,
1421                       TOKENEXTRA **tp, int mi_row, int mi_col,
1422                       int output_enabled, BLOCK_SIZE bsize,
1423                       PC_TREE *pc_tree) {
1424   VP9_COMMON *const cm = &cpi->common;
1425   MACROBLOCK *const x = &td->mb;
1426   MACROBLOCKD *const xd = &x->e_mbd;
1427 
1428   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
1429   int ctx;
1430   PARTITION_TYPE partition;
1431   BLOCK_SIZE subsize = bsize;
1432 
1433   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1434     return;
1435 
1436   if (bsize >= BLOCK_8X8) {
1437     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
1438     subsize = get_subsize(bsize, pc_tree->partitioning);
1439   } else {
1440     ctx = 0;
1441     subsize = BLOCK_4X4;
1442   }
1443 
1444   partition = partition_lookup[bsl][subsize];
1445   if (output_enabled && bsize != BLOCK_4X4)
1446     td->counts->partition[ctx][partition]++;
1447 
1448   switch (partition) {
1449     case PARTITION_NONE:
1450       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1451                &pc_tree->none);
1452       break;
1453     case PARTITION_VERT:
1454       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1455                &pc_tree->vertical[0]);
1456       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
1457         encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled,
1458                  subsize, &pc_tree->vertical[1]);
1459       }
1460       break;
1461     case PARTITION_HORZ:
1462       encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1463                &pc_tree->horizontal[0]);
1464       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
1465         encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled,
1466                  subsize, &pc_tree->horizontal[1]);
1467       }
1468       break;
1469     case PARTITION_SPLIT:
1470       if (bsize == BLOCK_8X8) {
1471         encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize,
1472                  pc_tree->leaf_split[0]);
1473       } else {
1474         encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1475                   pc_tree->split[0]);
1476         encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
1477                   subsize, pc_tree->split[1]);
1478         encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
1479                   subsize, pc_tree->split[2]);
1480         encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled,
1481                   subsize, pc_tree->split[3]);
1482       }
1483       break;
1484     default:
1485       assert(0 && "Invalid partition type.");
1486       break;
1487   }
1488 
1489   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
1490     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
1491 }
1492 
1493 // Check to see if the given partition size is allowed for a specified number
1494 // of 8x8 block rows and columns remaining in the image.
1495 // If not then return the largest allowed partition size
find_partition_size(BLOCK_SIZE bsize,int rows_left,int cols_left,int * bh,int * bw)1496 static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize,
1497                                       int rows_left, int cols_left,
1498                                       int *bh, int *bw) {
1499   if (rows_left <= 0 || cols_left <= 0) {
1500     return VPXMIN(bsize, BLOCK_8X8);
1501   } else {
1502     for (; bsize > 0; bsize -= 3) {
1503       *bh = num_8x8_blocks_high_lookup[bsize];
1504       *bw = num_8x8_blocks_wide_lookup[bsize];
1505       if ((*bh <= rows_left) && (*bw <= cols_left)) {
1506         break;
1507       }
1508     }
1509   }
1510   return bsize;
1511 }
1512 
set_partial_b64x64_partition(MODE_INFO * mi,int mis,int bh_in,int bw_in,int row8x8_remaining,int col8x8_remaining,BLOCK_SIZE bsize,MODE_INFO ** mi_8x8)1513 static void set_partial_b64x64_partition(MODE_INFO *mi, int mis,
1514     int bh_in, int bw_in, int row8x8_remaining, int col8x8_remaining,
1515     BLOCK_SIZE bsize, MODE_INFO **mi_8x8) {
1516   int bh = bh_in;
1517   int r, c;
1518   for (r = 0; r < MI_BLOCK_SIZE; r += bh) {
1519     int bw = bw_in;
1520     for (c = 0; c < MI_BLOCK_SIZE; c += bw) {
1521       const int index = r * mis + c;
1522       mi_8x8[index] = mi + index;
1523       mi_8x8[index]->mbmi.sb_type = find_partition_size(bsize,
1524           row8x8_remaining - r, col8x8_remaining - c, &bh, &bw);
1525     }
1526   }
1527 }
1528 
1529 // This function attempts to set all mode info entries in a given SB64
1530 // to the same block partition size.
1531 // However, at the bottom and right borders of the image the requested size
1532 // may not be allowed in which case this code attempts to choose the largest
1533 // allowable partition.
set_fixed_partitioning(VP9_COMP * cpi,const TileInfo * const tile,MODE_INFO ** mi_8x8,int mi_row,int mi_col,BLOCK_SIZE bsize)1534 static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile,
1535                                    MODE_INFO **mi_8x8, int mi_row, int mi_col,
1536                                    BLOCK_SIZE bsize) {
1537   VP9_COMMON *const cm = &cpi->common;
1538   const int mis = cm->mi_stride;
1539   const int row8x8_remaining = tile->mi_row_end - mi_row;
1540   const int col8x8_remaining = tile->mi_col_end - mi_col;
1541   int block_row, block_col;
1542   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
1543   int bh = num_8x8_blocks_high_lookup[bsize];
1544   int bw = num_8x8_blocks_wide_lookup[bsize];
1545 
1546   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
1547 
1548   // Apply the requested partition size to the SB64 if it is all "in image"
1549   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
1550       (row8x8_remaining >= MI_BLOCK_SIZE)) {
1551     for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) {
1552       for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) {
1553         int index = block_row * mis + block_col;
1554         mi_8x8[index] = mi_upper_left + index;
1555         mi_8x8[index]->mbmi.sb_type = bsize;
1556       }
1557     }
1558   } else {
1559     // Else this is a partial SB64.
1560     set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining,
1561         col8x8_remaining, bsize, mi_8x8);
1562   }
1563 }
1564 
1565 static const struct {
1566   int row;
1567   int col;
1568 } coord_lookup[16] = {
1569     // 32x32 index = 0
1570     {0, 0}, {0, 2}, {2, 0}, {2, 2},
1571     // 32x32 index = 1
1572     {0, 4}, {0, 6}, {2, 4}, {2, 6},
1573     // 32x32 index = 2
1574     {4, 0}, {4, 2}, {6, 0}, {6, 2},
1575     // 32x32 index = 3
1576     {4, 4}, {4, 6}, {6, 4}, {6, 6},
1577 };
1578 
set_source_var_based_partition(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCK * const x,MODE_INFO ** mi_8x8,int mi_row,int mi_col)1579 static void set_source_var_based_partition(VP9_COMP *cpi,
1580                                            const TileInfo *const tile,
1581                                            MACROBLOCK *const x,
1582                                            MODE_INFO **mi_8x8,
1583                                            int mi_row, int mi_col) {
1584   VP9_COMMON *const cm = &cpi->common;
1585   const int mis = cm->mi_stride;
1586   const int row8x8_remaining = tile->mi_row_end - mi_row;
1587   const int col8x8_remaining = tile->mi_col_end - mi_col;
1588   MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col;
1589 
1590   vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col);
1591 
1592   assert((row8x8_remaining > 0) && (col8x8_remaining > 0));
1593 
1594   // In-image SB64
1595   if ((col8x8_remaining >= MI_BLOCK_SIZE) &&
1596       (row8x8_remaining >= MI_BLOCK_SIZE)) {
1597     int i, j;
1598     int index;
1599     diff d32[4];
1600     const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1);
1601     int is_larger_better = 0;
1602     int use32x32 = 0;
1603     unsigned int thr = cpi->source_var_thresh;
1604 
1605     memset(d32, 0, 4 * sizeof(diff));
1606 
1607     for (i = 0; i < 4; i++) {
1608       diff *d16[4];
1609 
1610       for (j = 0; j < 4; j++) {
1611         int b_mi_row = coord_lookup[i * 4 + j].row;
1612         int b_mi_col = coord_lookup[i * 4 + j].col;
1613         int boffset = b_mi_row / 2 * cm->mb_cols +
1614                       b_mi_col / 2;
1615 
1616         d16[j] = cpi->source_diff_var + offset + boffset;
1617 
1618         index = b_mi_row * mis + b_mi_col;
1619         mi_8x8[index] = mi_upper_left + index;
1620         mi_8x8[index]->mbmi.sb_type = BLOCK_16X16;
1621 
1622         // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition
1623         // size to further improve quality.
1624       }
1625 
1626       is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) &&
1627           (d16[2]->var < thr) && (d16[3]->var < thr);
1628 
1629       // Use 32x32 partition
1630       if (is_larger_better) {
1631         use32x32 += 1;
1632 
1633         for (j = 0; j < 4; j++) {
1634           d32[i].sse += d16[j]->sse;
1635           d32[i].sum += d16[j]->sum;
1636         }
1637 
1638         d32[i].var = d32[i].sse - (((int64_t)d32[i].sum * d32[i].sum) >> 10);
1639 
1640         index = coord_lookup[i*4].row * mis + coord_lookup[i*4].col;
1641         mi_8x8[index] = mi_upper_left + index;
1642         mi_8x8[index]->mbmi.sb_type = BLOCK_32X32;
1643       }
1644     }
1645 
1646     if (use32x32 == 4) {
1647       thr <<= 1;
1648       is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) &&
1649           (d32[2].var < thr) && (d32[3].var < thr);
1650 
1651       // Use 64x64 partition
1652       if (is_larger_better) {
1653         mi_8x8[0] = mi_upper_left;
1654         mi_8x8[0]->mbmi.sb_type = BLOCK_64X64;
1655       }
1656     }
1657   } else {   // partial in-image SB64
1658     int bh = num_8x8_blocks_high_lookup[BLOCK_16X16];
1659     int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16];
1660     set_partial_b64x64_partition(mi_upper_left, mis, bh, bw,
1661         row8x8_remaining, col8x8_remaining, BLOCK_16X16, mi_8x8);
1662   }
1663 }
1664 
update_state_rt(VP9_COMP * cpi,ThreadData * td,PICK_MODE_CONTEXT * ctx,int mi_row,int mi_col,int bsize)1665 static void update_state_rt(VP9_COMP *cpi, ThreadData *td,
1666                             PICK_MODE_CONTEXT *ctx,
1667                             int mi_row, int mi_col, int bsize) {
1668   VP9_COMMON *const cm = &cpi->common;
1669   MACROBLOCK *const x = &td->mb;
1670   MACROBLOCKD *const xd = &x->e_mbd;
1671   MODE_INFO *const mi = xd->mi[0];
1672   MB_MODE_INFO *const mbmi = &xd->mi[0]->mbmi;
1673   const struct segmentation *const seg = &cm->seg;
1674   const int bw = num_8x8_blocks_wide_lookup[mi->mbmi.sb_type];
1675   const int bh = num_8x8_blocks_high_lookup[mi->mbmi.sb_type];
1676   const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col);
1677   const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row);
1678 
1679   *(xd->mi[0]) = ctx->mic;
1680   *(x->mbmi_ext) = ctx->mbmi_ext;
1681 
1682   if (seg->enabled && cpi->oxcf.aq_mode) {
1683     // For in frame complexity AQ or variance AQ, copy segment_id from
1684     // segmentation_map.
1685     if (cpi->oxcf.aq_mode == COMPLEXITY_AQ ||
1686         cpi->oxcf.aq_mode == VARIANCE_AQ ) {
1687       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
1688                                                  : cm->last_frame_seg_map;
1689       mbmi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col);
1690     } else {
1691     // Setting segmentation map for cyclic_refresh.
1692       vp9_cyclic_refresh_update_segment(cpi, mbmi, mi_row, mi_col, bsize,
1693                                         ctx->rate, ctx->dist, x->skip);
1694     }
1695     vp9_init_plane_quantizers(cpi, x);
1696   }
1697 
1698   if (is_inter_block(mbmi)) {
1699     vp9_update_mv_count(td);
1700     if (cm->interp_filter == SWITCHABLE) {
1701       const int pred_ctx = vp9_get_pred_context_switchable_interp(xd);
1702       ++td->counts->switchable_interp[pred_ctx][mbmi->interp_filter];
1703     }
1704 
1705     if (mbmi->sb_type < BLOCK_8X8) {
1706       mbmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int;
1707       mbmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int;
1708     }
1709   }
1710 
1711   if (cm->use_prev_frame_mvs) {
1712     MV_REF *const frame_mvs =
1713         cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col;
1714     int w, h;
1715 
1716     for (h = 0; h < y_mis; ++h) {
1717       MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols;
1718       for (w = 0; w < x_mis; ++w) {
1719         MV_REF *const mv = frame_mv + w;
1720         mv->ref_frame[0] = mi->mbmi.ref_frame[0];
1721         mv->ref_frame[1] = mi->mbmi.ref_frame[1];
1722         mv->mv[0].as_int = mi->mbmi.mv[0].as_int;
1723         mv->mv[1].as_int = mi->mbmi.mv[1].as_int;
1724       }
1725     }
1726   }
1727 
1728   x->skip = ctx->skip;
1729   x->skip_txfm[0] = mbmi->segment_id ? 0 : ctx->skip_txfm[0];
1730 }
1731 
encode_b_rt(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)1732 static void encode_b_rt(VP9_COMP *cpi, ThreadData *td,
1733                         const TileInfo *const tile,
1734                         TOKENEXTRA **tp, int mi_row, int mi_col,
1735                         int output_enabled, BLOCK_SIZE bsize,
1736                         PICK_MODE_CONTEXT *ctx) {
1737   MACROBLOCK *const x = &td->mb;
1738   set_offsets(cpi, tile, x, mi_row, mi_col, bsize);
1739   update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize);
1740 
1741 #if CONFIG_VP9_TEMPORAL_DENOISING
1742   if (cpi->oxcf.noise_sensitivity > 0 &&
1743       output_enabled &&
1744       cpi->common.frame_type != KEY_FRAME &&
1745       cpi->resize_pending == 0) {
1746     vp9_denoiser_denoise(&cpi->denoiser, x, mi_row, mi_col,
1747                          VPXMAX(BLOCK_8X8, bsize), ctx);
1748   }
1749 #endif
1750 
1751   encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx);
1752   update_stats(&cpi->common, td);
1753 
1754   (*tp)->token = EOSB_TOKEN;
1755   (*tp)++;
1756 }
1757 
encode_sb_rt(VP9_COMP * cpi,ThreadData * td,const TileInfo * const tile,TOKENEXTRA ** tp,int mi_row,int mi_col,int output_enabled,BLOCK_SIZE bsize,PC_TREE * pc_tree)1758 static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td,
1759                          const TileInfo *const tile,
1760                          TOKENEXTRA **tp, int mi_row, int mi_col,
1761                          int output_enabled, BLOCK_SIZE bsize,
1762                          PC_TREE *pc_tree) {
1763   VP9_COMMON *const cm = &cpi->common;
1764   MACROBLOCK *const x = &td->mb;
1765   MACROBLOCKD *const xd = &x->e_mbd;
1766 
1767   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
1768   int ctx;
1769   PARTITION_TYPE partition;
1770   BLOCK_SIZE subsize;
1771 
1772   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1773     return;
1774 
1775   if (bsize >= BLOCK_8X8) {
1776     const int idx_str = xd->mi_stride * mi_row + mi_col;
1777     MODE_INFO ** mi_8x8 = cm->mi_grid_visible + idx_str;
1778     ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
1779     subsize = mi_8x8[0]->mbmi.sb_type;
1780   } else {
1781     ctx = 0;
1782     subsize = BLOCK_4X4;
1783   }
1784 
1785   partition = partition_lookup[bsl][subsize];
1786   if (output_enabled && bsize != BLOCK_4X4)
1787     td->counts->partition[ctx][partition]++;
1788 
1789   switch (partition) {
1790     case PARTITION_NONE:
1791       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1792                   &pc_tree->none);
1793       break;
1794     case PARTITION_VERT:
1795       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1796                   &pc_tree->vertical[0]);
1797       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
1798         encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
1799                     subsize, &pc_tree->vertical[1]);
1800       }
1801       break;
1802     case PARTITION_HORZ:
1803       encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1804                   &pc_tree->horizontal[0]);
1805       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
1806         encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
1807                     subsize, &pc_tree->horizontal[1]);
1808       }
1809       break;
1810     case PARTITION_SPLIT:
1811       subsize = get_subsize(bsize, PARTITION_SPLIT);
1812       encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize,
1813                    pc_tree->split[0]);
1814       encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled,
1815                    subsize, pc_tree->split[1]);
1816       encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled,
1817                    subsize, pc_tree->split[2]);
1818       encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs,
1819                    output_enabled, subsize, pc_tree->split[3]);
1820       break;
1821     default:
1822       assert(0 && "Invalid partition type.");
1823       break;
1824   }
1825 
1826   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
1827     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
1828 }
1829 
rd_use_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi_8x8,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int * rate,int64_t * dist,int do_recon,PC_TREE * pc_tree)1830 static void rd_use_partition(VP9_COMP *cpi,
1831                              ThreadData *td,
1832                              TileDataEnc *tile_data,
1833                              MODE_INFO **mi_8x8, TOKENEXTRA **tp,
1834                              int mi_row, int mi_col,
1835                              BLOCK_SIZE bsize,
1836                              int *rate, int64_t *dist,
1837                              int do_recon, PC_TREE *pc_tree) {
1838   VP9_COMMON *const cm = &cpi->common;
1839   TileInfo *const tile_info = &tile_data->tile_info;
1840   MACROBLOCK *const x = &td->mb;
1841   MACROBLOCKD *const xd = &x->e_mbd;
1842   const int mis = cm->mi_stride;
1843   const int bsl = b_width_log2_lookup[bsize];
1844   const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2;
1845   const int bss = (1 << bsl) / 4;
1846   int i, pl;
1847   PARTITION_TYPE partition = PARTITION_NONE;
1848   BLOCK_SIZE subsize;
1849   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
1850   PARTITION_CONTEXT sl[8], sa[8];
1851   RD_COST last_part_rdc, none_rdc, chosen_rdc;
1852   BLOCK_SIZE sub_subsize = BLOCK_4X4;
1853   int splits_below = 0;
1854   BLOCK_SIZE bs_type = mi_8x8[0]->mbmi.sb_type;
1855   int do_partition_search = 1;
1856   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
1857 
1858   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
1859     return;
1860 
1861   assert(num_4x4_blocks_wide_lookup[bsize] ==
1862          num_4x4_blocks_high_lookup[bsize]);
1863 
1864   vp9_rd_cost_reset(&last_part_rdc);
1865   vp9_rd_cost_reset(&none_rdc);
1866   vp9_rd_cost_reset(&chosen_rdc);
1867 
1868   partition = partition_lookup[bsl][bs_type];
1869   subsize = get_subsize(bsize, partition);
1870 
1871   pc_tree->partitioning = partition;
1872   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
1873 
1874   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode) {
1875     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
1876     x->mb_energy = vp9_block_energy(cpi, x, bsize);
1877   }
1878 
1879   if (do_partition_search &&
1880       cpi->sf.partition_search_type == SEARCH_PARTITION &&
1881       cpi->sf.adjust_partitioning_from_last_frame) {
1882     // Check if any of the sub blocks are further split.
1883     if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) {
1884       sub_subsize = get_subsize(subsize, PARTITION_SPLIT);
1885       splits_below = 1;
1886       for (i = 0; i < 4; i++) {
1887         int jj = i >> 1, ii = i & 0x01;
1888         MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss];
1889         if (this_mi && this_mi->mbmi.sb_type >= sub_subsize) {
1890           splits_below = 0;
1891         }
1892       }
1893     }
1894 
1895     // If partition is not none try none unless each of the 4 splits are split
1896     // even further..
1897     if (partition != PARTITION_NONE && !splits_below &&
1898         mi_row + (mi_step >> 1) < cm->mi_rows &&
1899         mi_col + (mi_step >> 1) < cm->mi_cols) {
1900       pc_tree->partitioning = PARTITION_NONE;
1901       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize,
1902                        ctx, INT64_MAX);
1903 
1904       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
1905 
1906       if (none_rdc.rate < INT_MAX) {
1907         none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
1908         none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate,
1909                                  none_rdc.dist);
1910       }
1911 
1912       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
1913       mi_8x8[0]->mbmi.sb_type = bs_type;
1914       pc_tree->partitioning = partition;
1915     }
1916   }
1917 
1918   switch (partition) {
1919     case PARTITION_NONE:
1920       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1921                        bsize, ctx, INT64_MAX);
1922       break;
1923     case PARTITION_HORZ:
1924       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1925                        subsize, &pc_tree->horizontal[0],
1926                        INT64_MAX);
1927       if (last_part_rdc.rate != INT_MAX &&
1928           bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) {
1929         RD_COST tmp_rdc;
1930         PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
1931         vp9_rd_cost_init(&tmp_rdc);
1932         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
1933         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
1934         rd_pick_sb_modes(cpi, tile_data, x,
1935                          mi_row + (mi_step >> 1), mi_col, &tmp_rdc,
1936                          subsize, &pc_tree->horizontal[1], INT64_MAX);
1937         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1938           vp9_rd_cost_reset(&last_part_rdc);
1939           break;
1940         }
1941         last_part_rdc.rate += tmp_rdc.rate;
1942         last_part_rdc.dist += tmp_rdc.dist;
1943         last_part_rdc.rdcost += tmp_rdc.rdcost;
1944       }
1945       break;
1946     case PARTITION_VERT:
1947       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1948                        subsize, &pc_tree->vertical[0], INT64_MAX);
1949       if (last_part_rdc.rate != INT_MAX &&
1950           bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) {
1951         RD_COST tmp_rdc;
1952         PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0];
1953         vp9_rd_cost_init(&tmp_rdc);
1954         update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
1955         encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
1956         rd_pick_sb_modes(cpi, tile_data, x,
1957                          mi_row, mi_col + (mi_step >> 1), &tmp_rdc,
1958                          subsize, &pc_tree->vertical[bsize > BLOCK_8X8],
1959                          INT64_MAX);
1960         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1961           vp9_rd_cost_reset(&last_part_rdc);
1962           break;
1963         }
1964         last_part_rdc.rate += tmp_rdc.rate;
1965         last_part_rdc.dist += tmp_rdc.dist;
1966         last_part_rdc.rdcost += tmp_rdc.rdcost;
1967       }
1968       break;
1969     case PARTITION_SPLIT:
1970       if (bsize == BLOCK_8X8) {
1971         rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc,
1972                          subsize, pc_tree->leaf_split[0], INT64_MAX);
1973         break;
1974       }
1975       last_part_rdc.rate = 0;
1976       last_part_rdc.dist = 0;
1977       last_part_rdc.rdcost = 0;
1978       for (i = 0; i < 4; i++) {
1979         int x_idx = (i & 1) * (mi_step >> 1);
1980         int y_idx = (i >> 1) * (mi_step >> 1);
1981         int jj = i >> 1, ii = i & 0x01;
1982         RD_COST tmp_rdc;
1983         if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
1984           continue;
1985 
1986         vp9_rd_cost_init(&tmp_rdc);
1987         rd_use_partition(cpi, td, tile_data,
1988                          mi_8x8 + jj * bss * mis + ii * bss, tp,
1989                          mi_row + y_idx, mi_col + x_idx, subsize,
1990                          &tmp_rdc.rate, &tmp_rdc.dist,
1991                          i != 3, pc_tree->split[i]);
1992         if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
1993           vp9_rd_cost_reset(&last_part_rdc);
1994           break;
1995         }
1996         last_part_rdc.rate += tmp_rdc.rate;
1997         last_part_rdc.dist += tmp_rdc.dist;
1998       }
1999       break;
2000     default:
2001       assert(0);
2002       break;
2003   }
2004 
2005   pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2006   if (last_part_rdc.rate < INT_MAX) {
2007     last_part_rdc.rate += cpi->partition_cost[pl][partition];
2008     last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
2009                                   last_part_rdc.rate, last_part_rdc.dist);
2010   }
2011 
2012   if (do_partition_search
2013       && cpi->sf.adjust_partitioning_from_last_frame
2014       && cpi->sf.partition_search_type == SEARCH_PARTITION
2015       && partition != PARTITION_SPLIT && bsize > BLOCK_8X8
2016       && (mi_row + mi_step < cm->mi_rows ||
2017           mi_row + (mi_step >> 1) == cm->mi_rows)
2018       && (mi_col + mi_step < cm->mi_cols ||
2019           mi_col + (mi_step >> 1) == cm->mi_cols)) {
2020     BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT);
2021     chosen_rdc.rate = 0;
2022     chosen_rdc.dist = 0;
2023     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2024     pc_tree->partitioning = PARTITION_SPLIT;
2025 
2026     // Split partition.
2027     for (i = 0; i < 4; i++) {
2028       int x_idx = (i & 1) * (mi_step >> 1);
2029       int y_idx = (i >> 1) * (mi_step >> 1);
2030       RD_COST tmp_rdc;
2031       ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2032       PARTITION_CONTEXT sl[8], sa[8];
2033 
2034       if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols))
2035         continue;
2036 
2037       save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2038       pc_tree->split[i]->partitioning = PARTITION_NONE;
2039       rd_pick_sb_modes(cpi, tile_data, x,
2040                        mi_row + y_idx, mi_col + x_idx, &tmp_rdc,
2041                        split_subsize, &pc_tree->split[i]->none, INT64_MAX);
2042 
2043       restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2044 
2045       if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) {
2046         vp9_rd_cost_reset(&chosen_rdc);
2047         break;
2048       }
2049 
2050       chosen_rdc.rate += tmp_rdc.rate;
2051       chosen_rdc.dist += tmp_rdc.dist;
2052 
2053       if (i != 3)
2054         encode_sb(cpi, td, tile_info, tp,  mi_row + y_idx, mi_col + x_idx, 0,
2055                   split_subsize, pc_tree->split[i]);
2056 
2057       pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx,
2058                                    split_subsize);
2059       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2060     }
2061     pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2062     if (chosen_rdc.rate < INT_MAX) {
2063       chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
2064       chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
2065                                  chosen_rdc.rate, chosen_rdc.dist);
2066     }
2067   }
2068 
2069   // If last_part is better set the partitioning to that.
2070   if (last_part_rdc.rdcost < chosen_rdc.rdcost) {
2071     mi_8x8[0]->mbmi.sb_type = bsize;
2072     if (bsize >= BLOCK_8X8)
2073       pc_tree->partitioning = partition;
2074     chosen_rdc = last_part_rdc;
2075   }
2076   // If none was better set the partitioning to that.
2077   if (none_rdc.rdcost < chosen_rdc.rdcost) {
2078     if (bsize >= BLOCK_8X8)
2079       pc_tree->partitioning = PARTITION_NONE;
2080     chosen_rdc = none_rdc;
2081   }
2082 
2083   restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2084 
2085   // We must have chosen a partitioning and encoding or we'll fail later on.
2086   // No other opportunities for success.
2087   if (bsize == BLOCK_64X64)
2088     assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX);
2089 
2090   if (do_recon) {
2091     int output_enabled = (bsize == BLOCK_64X64);
2092     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize,
2093               pc_tree);
2094   }
2095 
2096   *rate = chosen_rdc.rate;
2097   *dist = chosen_rdc.dist;
2098 }
2099 
2100 static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = {
2101   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,
2102   BLOCK_4X4,   BLOCK_4X4,   BLOCK_4X4,
2103   BLOCK_8X8,   BLOCK_8X8,   BLOCK_8X8,
2104   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
2105   BLOCK_16X16
2106 };
2107 
2108 static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = {
2109   BLOCK_8X8,   BLOCK_16X16, BLOCK_16X16,
2110   BLOCK_16X16, BLOCK_32X32, BLOCK_32X32,
2111   BLOCK_32X32, BLOCK_64X64, BLOCK_64X64,
2112   BLOCK_64X64, BLOCK_64X64, BLOCK_64X64,
2113   BLOCK_64X64
2114 };
2115 
2116 
2117 // Look at all the mode_info entries for blocks that are part of this
2118 // partition and find the min and max values for sb_type.
2119 // At the moment this is designed to work on a 64x64 SB but could be
2120 // adjusted to use a size parameter.
2121 //
2122 // The min and max are assumed to have been initialized prior to calling this
2123 // function so repeat calls can accumulate a min and max of more than one sb64.
get_sb_partition_size_range(MACROBLOCKD * xd,MODE_INFO ** mi_8x8,BLOCK_SIZE * min_block_size,BLOCK_SIZE * max_block_size,int bs_hist[BLOCK_SIZES])2124 static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8,
2125                                         BLOCK_SIZE *min_block_size,
2126                                         BLOCK_SIZE *max_block_size,
2127                                         int bs_hist[BLOCK_SIZES]) {
2128   int sb_width_in_blocks = MI_BLOCK_SIZE;
2129   int sb_height_in_blocks  = MI_BLOCK_SIZE;
2130   int i, j;
2131   int index = 0;
2132 
2133   // Check the sb_type for each block that belongs to this region.
2134   for (i = 0; i < sb_height_in_blocks; ++i) {
2135     for (j = 0; j < sb_width_in_blocks; ++j) {
2136       MODE_INFO *mi = mi_8x8[index+j];
2137       BLOCK_SIZE sb_type = mi ? mi->mbmi.sb_type : 0;
2138       bs_hist[sb_type]++;
2139       *min_block_size = VPXMIN(*min_block_size, sb_type);
2140       *max_block_size = VPXMAX(*max_block_size, sb_type);
2141     }
2142     index += xd->mi_stride;
2143   }
2144 }
2145 
2146 // Next square block size less or equal than current block size.
2147 static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = {
2148   BLOCK_4X4, BLOCK_4X4, BLOCK_4X4,
2149   BLOCK_8X8, BLOCK_8X8, BLOCK_8X8,
2150   BLOCK_16X16, BLOCK_16X16, BLOCK_16X16,
2151   BLOCK_32X32, BLOCK_32X32, BLOCK_32X32,
2152   BLOCK_64X64
2153 };
2154 
2155 // Look at neighboring blocks and set a min and max partition size based on
2156 // what they chose.
rd_auto_partition_range(VP9_COMP * cpi,const TileInfo * const tile,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE * min_block_size,BLOCK_SIZE * max_block_size)2157 static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile,
2158                                     MACROBLOCKD *const xd,
2159                                     int mi_row, int mi_col,
2160                                     BLOCK_SIZE *min_block_size,
2161                                     BLOCK_SIZE *max_block_size) {
2162   VP9_COMMON *const cm = &cpi->common;
2163   MODE_INFO **mi = xd->mi;
2164   const int left_in_image = xd->left_available && mi[-1];
2165   const int above_in_image = xd->up_available && mi[-xd->mi_stride];
2166   const int row8x8_remaining = tile->mi_row_end - mi_row;
2167   const int col8x8_remaining = tile->mi_col_end - mi_col;
2168   int bh, bw;
2169   BLOCK_SIZE min_size = BLOCK_4X4;
2170   BLOCK_SIZE max_size = BLOCK_64X64;
2171   int bs_hist[BLOCK_SIZES] = {0};
2172 
2173   // Trap case where we do not have a prediction.
2174   if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) {
2175     // Default "min to max" and "max to min"
2176     min_size = BLOCK_64X64;
2177     max_size = BLOCK_4X4;
2178 
2179     // NOTE: each call to get_sb_partition_size_range() uses the previous
2180     // passed in values for min and max as a starting point.
2181     // Find the min and max partition used in previous frame at this location
2182     if (cm->frame_type != KEY_FRAME) {
2183       MODE_INFO **prev_mi =
2184           &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col];
2185       get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist);
2186     }
2187     // Find the min and max partition sizes used in the left SB64
2188     if (left_in_image) {
2189       MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE];
2190       get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size,
2191                                   bs_hist);
2192     }
2193     // Find the min and max partition sizes used in the above SB64.
2194     if (above_in_image) {
2195       MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE];
2196       get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size,
2197                                   bs_hist);
2198     }
2199 
2200     // Adjust observed min and max for "relaxed" auto partition case.
2201     if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) {
2202       min_size = min_partition_size[min_size];
2203       max_size = max_partition_size[max_size];
2204     }
2205   }
2206 
2207   // Check border cases where max and min from neighbors may not be legal.
2208   max_size = find_partition_size(max_size,
2209                                  row8x8_remaining, col8x8_remaining,
2210                                  &bh, &bw);
2211   // Test for blocks at the edge of the active image.
2212   // This may be the actual edge of the image or where there are formatting
2213   // bars.
2214   if (vp9_active_edge_sb(cpi, mi_row, mi_col)) {
2215     min_size = BLOCK_4X4;
2216   } else {
2217     min_size =
2218         VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size));
2219   }
2220 
2221   // When use_square_partition_only is true, make sure at least one square
2222   // partition is allowed by selecting the next smaller square size as
2223   // *min_block_size.
2224   if (cpi->sf.use_square_partition_only &&
2225       next_square_size[max_size] < min_size) {
2226      min_size = next_square_size[max_size];
2227   }
2228 
2229   *min_block_size = min_size;
2230   *max_block_size = max_size;
2231 }
2232 
2233 // TODO(jingning) refactor functions setting partition search range
set_partition_range(VP9_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,BLOCK_SIZE bsize,BLOCK_SIZE * min_bs,BLOCK_SIZE * max_bs)2234 static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd,
2235                                 int mi_row, int mi_col, BLOCK_SIZE bsize,
2236                                 BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) {
2237   int mi_width  = num_8x8_blocks_wide_lookup[bsize];
2238   int mi_height = num_8x8_blocks_high_lookup[bsize];
2239   int idx, idy;
2240 
2241   MODE_INFO *mi;
2242   const int idx_str = cm->mi_stride * mi_row + mi_col;
2243   MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str];
2244   BLOCK_SIZE bs, min_size, max_size;
2245 
2246   min_size = BLOCK_64X64;
2247   max_size = BLOCK_4X4;
2248 
2249   if (prev_mi) {
2250     for (idy = 0; idy < mi_height; ++idy) {
2251       for (idx = 0; idx < mi_width; ++idx) {
2252         mi = prev_mi[idy * cm->mi_stride + idx];
2253         bs = mi ? mi->mbmi.sb_type : bsize;
2254         min_size = VPXMIN(min_size, bs);
2255         max_size = VPXMAX(max_size, bs);
2256       }
2257     }
2258   }
2259 
2260   if (xd->left_available) {
2261     for (idy = 0; idy < mi_height; ++idy) {
2262       mi = xd->mi[idy * cm->mi_stride - 1];
2263       bs = mi ? mi->mbmi.sb_type : bsize;
2264       min_size = VPXMIN(min_size, bs);
2265       max_size = VPXMAX(max_size, bs);
2266     }
2267   }
2268 
2269   if (xd->up_available) {
2270     for (idx = 0; idx < mi_width; ++idx) {
2271       mi = xd->mi[idx - cm->mi_stride];
2272       bs = mi ? mi->mbmi.sb_type : bsize;
2273       min_size = VPXMIN(min_size, bs);
2274       max_size = VPXMAX(max_size, bs);
2275     }
2276   }
2277 
2278   if (min_size == max_size) {
2279     min_size = min_partition_size[min_size];
2280     max_size = max_partition_size[max_size];
2281   }
2282 
2283   *min_bs = min_size;
2284   *max_bs = max_size;
2285 }
2286 
store_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2287 static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2288   memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv));
2289 }
2290 
load_pred_mv(MACROBLOCK * x,PICK_MODE_CONTEXT * ctx)2291 static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) {
2292   memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv));
2293 }
2294 
2295 #if CONFIG_FP_MB_STATS
2296 const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] =
2297   {1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4};
2298 const int num_16x16_blocks_high_lookup[BLOCK_SIZES] =
2299   {1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4};
2300 const int qindex_skip_threshold_lookup[BLOCK_SIZES] =
2301   {0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120};
2302 const int qindex_split_threshold_lookup[BLOCK_SIZES] =
2303   {0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120};
2304 const int complexity_16x16_blocks_threshold[BLOCK_SIZES] =
2305   {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6};
2306 
2307 typedef enum {
2308   MV_ZERO = 0,
2309   MV_LEFT = 1,
2310   MV_UP = 2,
2311   MV_RIGHT = 3,
2312   MV_DOWN = 4,
2313   MV_INVALID
2314 } MOTION_DIRECTION;
2315 
get_motion_direction_fp(uint8_t fp_byte)2316 static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) {
2317   if (fp_byte & FPMB_MOTION_ZERO_MASK) {
2318     return MV_ZERO;
2319   } else if (fp_byte & FPMB_MOTION_LEFT_MASK) {
2320     return MV_LEFT;
2321   } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) {
2322     return MV_RIGHT;
2323   } else if (fp_byte & FPMB_MOTION_UP_MASK) {
2324     return MV_UP;
2325   } else {
2326     return MV_DOWN;
2327   }
2328 }
2329 
get_motion_inconsistency(MOTION_DIRECTION this_mv,MOTION_DIRECTION that_mv)2330 static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv,
2331                                            MOTION_DIRECTION that_mv) {
2332   if (this_mv == that_mv) {
2333     return 0;
2334   } else {
2335     return abs(this_mv - that_mv) == 2 ? 2 : 1;
2336   }
2337 }
2338 #endif
2339 
2340 // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are
2341 // unlikely to be selected depending on previous rate-distortion optimization
2342 // results, for encoding speed-up.
rd_pick_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_COST * rd_cost,int64_t best_rd,PC_TREE * pc_tree)2343 static void rd_pick_partition(VP9_COMP *cpi, ThreadData *td,
2344                               TileDataEnc *tile_data,
2345                               TOKENEXTRA **tp, int mi_row, int mi_col,
2346                               BLOCK_SIZE bsize, RD_COST *rd_cost,
2347                               int64_t best_rd, PC_TREE *pc_tree) {
2348   VP9_COMMON *const cm = &cpi->common;
2349   TileInfo *const tile_info = &tile_data->tile_info;
2350   MACROBLOCK *const x = &td->mb;
2351   MACROBLOCKD *const xd = &x->e_mbd;
2352   const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2;
2353   ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE];
2354   PARTITION_CONTEXT sl[8], sa[8];
2355   TOKENEXTRA *tp_orig = *tp;
2356   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
2357   int i, pl;
2358   BLOCK_SIZE subsize;
2359   RD_COST this_rdc, sum_rdc, best_rdc;
2360   int do_split = bsize >= BLOCK_8X8;
2361   int do_rect = 1;
2362 
2363   // Override skipping rectangular partition operations for edge blocks
2364   const int force_horz_split = (mi_row + mi_step >= cm->mi_rows);
2365   const int force_vert_split = (mi_col + mi_step >= cm->mi_cols);
2366   const int xss = x->e_mbd.plane[1].subsampling_x;
2367   const int yss = x->e_mbd.plane[1].subsampling_y;
2368 
2369   BLOCK_SIZE min_size = x->min_partition_size;
2370   BLOCK_SIZE max_size = x->max_partition_size;
2371 
2372 #if CONFIG_FP_MB_STATS
2373   unsigned int src_diff_var = UINT_MAX;
2374   int none_complexity = 0;
2375 #endif
2376 
2377   int partition_none_allowed = !force_horz_split && !force_vert_split;
2378   int partition_horz_allowed = !force_vert_split && yss <= xss &&
2379                                bsize >= BLOCK_8X8;
2380   int partition_vert_allowed = !force_horz_split && xss <= yss &&
2381                                bsize >= BLOCK_8X8;
2382 
2383   int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_dist_thr;
2384   int rate_breakout_thr = cpi->sf.partition_search_breakout_rate_thr;
2385 
2386   (void)*tp_orig;
2387 
2388   assert(num_8x8_blocks_wide_lookup[bsize] ==
2389              num_8x8_blocks_high_lookup[bsize]);
2390 
2391   // Adjust dist breakout threshold according to the partition size.
2392   dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
2393       b_height_log2_lookup[bsize]);
2394   rate_breakout_thr *= num_pels_log2_lookup[bsize];
2395 
2396   vp9_rd_cost_init(&this_rdc);
2397   vp9_rd_cost_init(&sum_rdc);
2398   vp9_rd_cost_reset(&best_rdc);
2399   best_rdc.rdcost = best_rd;
2400 
2401   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2402 
2403   if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode)
2404     x->mb_energy = vp9_block_energy(cpi, x, bsize);
2405 
2406   if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) {
2407     int cb_partition_search_ctrl = ((pc_tree->index == 0 || pc_tree->index == 3)
2408         + get_chessboard_index(cm->current_video_frame)) & 0x1;
2409 
2410     if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size)
2411       set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size);
2412   }
2413 
2414   // Determine partition types in search according to the speed features.
2415   // The threshold set here has to be of square block size.
2416   if (cpi->sf.auto_min_max_partition_size) {
2417     partition_none_allowed &= (bsize <= max_size && bsize >= min_size);
2418     partition_horz_allowed &= ((bsize <= max_size && bsize > min_size) ||
2419                                 force_horz_split);
2420     partition_vert_allowed &= ((bsize <= max_size && bsize > min_size) ||
2421                                 force_vert_split);
2422     do_split &= bsize > min_size;
2423   }
2424 
2425   if (cpi->sf.use_square_partition_only &&
2426       bsize > cpi->sf.use_square_only_threshold) {
2427       partition_horz_allowed &= force_horz_split;
2428       partition_vert_allowed &= force_vert_split;
2429   }
2430 
2431   save_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2432 
2433 #if CONFIG_FP_MB_STATS
2434   if (cpi->use_fp_mb_stats) {
2435     set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2436     src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src,
2437                                                   mi_row, mi_col, bsize);
2438   }
2439 #endif
2440 
2441 #if CONFIG_FP_MB_STATS
2442   // Decide whether we shall split directly and skip searching NONE by using
2443   // the first pass block statistics
2444   if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split &&
2445       partition_none_allowed && src_diff_var > 4 &&
2446       cm->base_qindex < qindex_split_threshold_lookup[bsize]) {
2447     int mb_row = mi_row >> 1;
2448     int mb_col = mi_col >> 1;
2449     int mb_row_end =
2450         VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
2451     int mb_col_end =
2452         VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
2453     int r, c;
2454 
2455     // compute a complexity measure, basically measure inconsistency of motion
2456     // vectors obtained from the first pass in the current block
2457     for (r = mb_row; r < mb_row_end ; r++) {
2458       for (c = mb_col; c < mb_col_end; c++) {
2459         const int mb_index = r * cm->mb_cols + c;
2460 
2461         MOTION_DIRECTION this_mv;
2462         MOTION_DIRECTION right_mv;
2463         MOTION_DIRECTION bottom_mv;
2464 
2465         this_mv =
2466             get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]);
2467 
2468         // to its right
2469         if (c != mb_col_end - 1) {
2470           right_mv = get_motion_direction_fp(
2471               cpi->twopass.this_frame_mb_stats[mb_index + 1]);
2472           none_complexity += get_motion_inconsistency(this_mv, right_mv);
2473         }
2474 
2475         // to its bottom
2476         if (r != mb_row_end - 1) {
2477           bottom_mv = get_motion_direction_fp(
2478               cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]);
2479           none_complexity += get_motion_inconsistency(this_mv, bottom_mv);
2480         }
2481 
2482         // do not count its left and top neighbors to avoid double counting
2483       }
2484     }
2485 
2486     if (none_complexity > complexity_16x16_blocks_threshold[bsize]) {
2487       partition_none_allowed = 0;
2488     }
2489   }
2490 #endif
2491 
2492   // PARTITION_NONE
2493   if (partition_none_allowed) {
2494     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col,
2495                      &this_rdc, bsize, ctx, best_rdc.rdcost);
2496     if (this_rdc.rate != INT_MAX) {
2497       if (bsize >= BLOCK_8X8) {
2498         pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2499         this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
2500         this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
2501                                  this_rdc.rate, this_rdc.dist);
2502       }
2503 
2504       if (this_rdc.rdcost < best_rdc.rdcost) {
2505         best_rdc = this_rdc;
2506         if (bsize >= BLOCK_8X8)
2507           pc_tree->partitioning = PARTITION_NONE;
2508 
2509         // If all y, u, v transform blocks in this partition are skippable, and
2510         // the dist & rate are within the thresholds, the partition search is
2511         // terminated for current branch of the partition search tree.
2512         if (!x->e_mbd.lossless && ctx->skippable  &&
2513             ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
2514              (best_rdc.dist < dist_breakout_thr &&
2515               best_rdc.rate < rate_breakout_thr))) {
2516           do_split = 0;
2517           do_rect = 0;
2518         }
2519 
2520 #if CONFIG_FP_MB_STATS
2521         // Check if every 16x16 first pass block statistics has zero
2522         // motion and the corresponding first pass residue is small enough.
2523         // If that is the case, check the difference variance between the
2524         // current frame and the last frame. If the variance is small enough,
2525         // stop further splitting in RD optimization
2526         if (cpi->use_fp_mb_stats && do_split != 0 &&
2527             cm->base_qindex > qindex_skip_threshold_lookup[bsize]) {
2528           int mb_row = mi_row >> 1;
2529           int mb_col = mi_col >> 1;
2530           int mb_row_end =
2531               VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows);
2532           int mb_col_end =
2533               VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols);
2534           int r, c;
2535 
2536           int skip = 1;
2537           for (r = mb_row; r < mb_row_end; r++) {
2538             for (c = mb_col; c < mb_col_end; c++) {
2539               const int mb_index = r * cm->mb_cols + c;
2540               if (!(cpi->twopass.this_frame_mb_stats[mb_index] &
2541                     FPMB_MOTION_ZERO_MASK) ||
2542                   !(cpi->twopass.this_frame_mb_stats[mb_index] &
2543                     FPMB_ERROR_SMALL_MASK)) {
2544                 skip = 0;
2545                 break;
2546               }
2547             }
2548             if (skip == 0) {
2549               break;
2550             }
2551           }
2552           if (skip) {
2553             if (src_diff_var == UINT_MAX) {
2554               set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2555               src_diff_var = get_sby_perpixel_diff_variance(
2556                   cpi, &x->plane[0].src, mi_row, mi_col, bsize);
2557             }
2558             if (src_diff_var < 8) {
2559               do_split = 0;
2560               do_rect = 0;
2561             }
2562           }
2563         }
2564 #endif
2565       }
2566     }
2567     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2568   }
2569 
2570   // store estimated motion vector
2571   if (cpi->sf.adaptive_motion_search)
2572     store_pred_mv(x, ctx);
2573 
2574   // PARTITION_SPLIT
2575   // TODO(jingning): use the motion vectors given by the above search as
2576   // the starting point of motion search in the following partition type check.
2577   if (do_split) {
2578     subsize = get_subsize(bsize, PARTITION_SPLIT);
2579     if (bsize == BLOCK_8X8) {
2580       i = 4;
2581       if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed)
2582         pc_tree->leaf_split[0]->pred_interp_filter =
2583             ctx->mic.mbmi.interp_filter;
2584       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
2585                        pc_tree->leaf_split[0], best_rdc.rdcost);
2586       if (sum_rdc.rate == INT_MAX)
2587         sum_rdc.rdcost = INT64_MAX;
2588     } else {
2589       for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
2590       const int x_idx = (i & 1) * mi_step;
2591       const int y_idx = (i >> 1) * mi_step;
2592 
2593         if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
2594           continue;
2595 
2596         if (cpi->sf.adaptive_motion_search)
2597           load_pred_mv(x, ctx);
2598 
2599         pc_tree->split[i]->index = i;
2600         rd_pick_partition(cpi, td, tile_data, tp,
2601                           mi_row + y_idx, mi_col + x_idx,
2602                           subsize, &this_rdc,
2603                           best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
2604 
2605         if (this_rdc.rate == INT_MAX) {
2606           sum_rdc.rdcost = INT64_MAX;
2607           break;
2608         } else {
2609           sum_rdc.rate += this_rdc.rate;
2610           sum_rdc.dist += this_rdc.dist;
2611           sum_rdc.rdcost += this_rdc.rdcost;
2612         }
2613       }
2614     }
2615 
2616     if (sum_rdc.rdcost < best_rdc.rdcost && i == 4) {
2617       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2618       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
2619       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
2620                               sum_rdc.rate, sum_rdc.dist);
2621 
2622       if (sum_rdc.rdcost < best_rdc.rdcost) {
2623         best_rdc = sum_rdc;
2624         pc_tree->partitioning = PARTITION_SPLIT;
2625 
2626         // Rate and distortion based partition search termination clause.
2627         if (!x->e_mbd.lossless &&
2628             ((best_rdc.dist < (dist_breakout_thr >> 2)) ||
2629              (best_rdc.dist < dist_breakout_thr &&
2630               best_rdc.rate < rate_breakout_thr))) {
2631           do_rect = 0;
2632         }
2633       }
2634     } else {
2635       // skip rectangular partition test when larger block size
2636       // gives better rd cost
2637       if ((cpi->sf.less_rectangular_check) &&
2638           ((bsize > cpi->sf.use_square_only_threshold) ||
2639            (best_rdc.dist < dist_breakout_thr)))
2640         do_rect &= !partition_none_allowed;
2641     }
2642     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2643   }
2644 
2645   // PARTITION_HORZ
2646   if (partition_horz_allowed &&
2647       (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) {
2648     subsize = get_subsize(bsize, PARTITION_HORZ);
2649     if (cpi->sf.adaptive_motion_search)
2650       load_pred_mv(x, ctx);
2651     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2652         partition_none_allowed)
2653       pc_tree->horizontal[0].pred_interp_filter =
2654           ctx->mic.mbmi.interp_filter;
2655     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
2656                      &pc_tree->horizontal[0], best_rdc.rdcost);
2657 
2658     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows &&
2659         bsize > BLOCK_8X8) {
2660       PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0];
2661       update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0);
2662       encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx);
2663 
2664       if (cpi->sf.adaptive_motion_search)
2665         load_pred_mv(x, ctx);
2666       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2667           partition_none_allowed)
2668         pc_tree->horizontal[1].pred_interp_filter =
2669             ctx->mic.mbmi.interp_filter;
2670       rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col,
2671                        &this_rdc, subsize, &pc_tree->horizontal[1],
2672                        best_rdc.rdcost - sum_rdc.rdcost);
2673       if (this_rdc.rate == INT_MAX) {
2674         sum_rdc.rdcost = INT64_MAX;
2675       } else {
2676         sum_rdc.rate += this_rdc.rate;
2677         sum_rdc.dist += this_rdc.dist;
2678         sum_rdc.rdcost += this_rdc.rdcost;
2679       }
2680     }
2681 
2682     if (sum_rdc.rdcost < best_rdc.rdcost) {
2683       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2684       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
2685       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
2686       if (sum_rdc.rdcost < best_rdc.rdcost) {
2687         best_rdc = sum_rdc;
2688         pc_tree->partitioning = PARTITION_HORZ;
2689 
2690         if ((cpi->sf.less_rectangular_check) &&
2691             (bsize > cpi->sf.use_square_only_threshold))
2692           do_rect = 0;
2693       }
2694     }
2695     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2696   }
2697   // PARTITION_VERT
2698   if (partition_vert_allowed &&
2699       (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) {
2700     subsize = get_subsize(bsize, PARTITION_VERT);
2701 
2702     if (cpi->sf.adaptive_motion_search)
2703       load_pred_mv(x, ctx);
2704     if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2705         partition_none_allowed)
2706       pc_tree->vertical[0].pred_interp_filter =
2707           ctx->mic.mbmi.interp_filter;
2708     rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
2709                      &pc_tree->vertical[0], best_rdc.rdcost);
2710     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols &&
2711         bsize > BLOCK_8X8) {
2712       update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0);
2713       encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize,
2714                         &pc_tree->vertical[0]);
2715 
2716       if (cpi->sf.adaptive_motion_search)
2717         load_pred_mv(x, ctx);
2718       if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 &&
2719           partition_none_allowed)
2720         pc_tree->vertical[1].pred_interp_filter =
2721             ctx->mic.mbmi.interp_filter;
2722       rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step,
2723                        &this_rdc, subsize,
2724                        &pc_tree->vertical[1], best_rdc.rdcost - sum_rdc.rdcost);
2725       if (this_rdc.rate == INT_MAX) {
2726         sum_rdc.rdcost = INT64_MAX;
2727       } else {
2728         sum_rdc.rate += this_rdc.rate;
2729         sum_rdc.dist += this_rdc.dist;
2730         sum_rdc.rdcost += this_rdc.rdcost;
2731       }
2732     }
2733 
2734     if (sum_rdc.rdcost < best_rdc.rdcost) {
2735       pl = partition_plane_context(xd, mi_row, mi_col, bsize);
2736       sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
2737       sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
2738                               sum_rdc.rate, sum_rdc.dist);
2739       if (sum_rdc.rdcost < best_rdc.rdcost) {
2740         best_rdc = sum_rdc;
2741         pc_tree->partitioning = PARTITION_VERT;
2742       }
2743     }
2744     restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize);
2745   }
2746 
2747   // TODO(jbb): This code added so that we avoid static analysis
2748   // warning related to the fact that best_rd isn't used after this
2749   // point.  This code should be refactored so that the duplicate
2750   // checks occur in some sub function and thus are used...
2751   (void) best_rd;
2752   *rd_cost = best_rdc;
2753 
2754   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX &&
2755       pc_tree->index != 3) {
2756     int output_enabled = (bsize == BLOCK_64X64);
2757     encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
2758               bsize, pc_tree);
2759   }
2760 
2761   if (bsize == BLOCK_64X64) {
2762     assert(tp_orig < *tp);
2763     assert(best_rdc.rate < INT_MAX);
2764     assert(best_rdc.dist < INT64_MAX);
2765   } else {
2766     assert(tp_orig == *tp);
2767   }
2768 }
2769 
encode_rd_sb_row(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)2770 static void encode_rd_sb_row(VP9_COMP *cpi,
2771                              ThreadData *td,
2772                              TileDataEnc *tile_data,
2773                              int mi_row,
2774                              TOKENEXTRA **tp) {
2775   VP9_COMMON *const cm = &cpi->common;
2776   TileInfo *const tile_info = &tile_data->tile_info;
2777   MACROBLOCK *const x = &td->mb;
2778   MACROBLOCKD *const xd = &x->e_mbd;
2779   SPEED_FEATURES *const sf = &cpi->sf;
2780   int mi_col;
2781 
2782   // Initialize the left context for the new SB row
2783   memset(&xd->left_context, 0, sizeof(xd->left_context));
2784   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
2785 
2786   // Code each SB in the row
2787   for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
2788        mi_col += MI_BLOCK_SIZE) {
2789     const struct segmentation *const seg = &cm->seg;
2790     int dummy_rate;
2791     int64_t dummy_dist;
2792     RD_COST dummy_rdc;
2793     int i;
2794     int seg_skip = 0;
2795 
2796     const int idx_str = cm->mi_stride * mi_row + mi_col;
2797     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
2798 
2799     if (sf->adaptive_pred_interp_filter) {
2800       for (i = 0; i < 64; ++i)
2801         td->leaf_tree[i].pred_interp_filter = SWITCHABLE;
2802 
2803       for (i = 0; i < 64; ++i) {
2804         td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE;
2805         td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE;
2806         td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE;
2807         td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE;
2808       }
2809     }
2810 
2811     vp9_zero(x->pred_mv);
2812     td->pc_root->index = 0;
2813 
2814     if (seg->enabled) {
2815       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
2816                                                  : cm->last_frame_seg_map;
2817       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
2818       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
2819     }
2820 
2821     x->source_variance = UINT_MAX;
2822     if (sf->partition_search_type == FIXED_PARTITION || seg_skip) {
2823       const BLOCK_SIZE bsize =
2824           seg_skip ? BLOCK_64X64 : sf->always_this_block_size;
2825       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
2826       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
2827       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
2828                        BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
2829     } else if (cpi->partition_search_skippable_frame) {
2830       BLOCK_SIZE bsize;
2831       set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
2832       bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col);
2833       set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
2834       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
2835                        BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
2836     } else if (sf->partition_search_type == VAR_BASED_PARTITION &&
2837                cm->frame_type != KEY_FRAME) {
2838       choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
2839       rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
2840                        BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root);
2841     } else {
2842       // If required set upper and lower partition size limits
2843       if (sf->auto_min_max_partition_size) {
2844         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
2845         rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col,
2846                                 &x->min_partition_size,
2847                                 &x->max_partition_size);
2848       }
2849       rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64,
2850                         &dummy_rdc, INT64_MAX, td->pc_root);
2851     }
2852   }
2853 }
2854 
init_encode_frame_mb_context(VP9_COMP * cpi)2855 static void init_encode_frame_mb_context(VP9_COMP *cpi) {
2856   MACROBLOCK *const x = &cpi->td.mb;
2857   VP9_COMMON *const cm = &cpi->common;
2858   MACROBLOCKD *const xd = &x->e_mbd;
2859   const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols);
2860 
2861   // Copy data over into macro block data structures.
2862   vp9_setup_src_planes(x, cpi->Source, 0, 0);
2863 
2864   vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y);
2865 
2866   // Note: this memset assumes above_context[0], [1] and [2]
2867   // are allocated as part of the same buffer.
2868   memset(xd->above_context[0], 0,
2869          sizeof(*xd->above_context[0]) *
2870          2 * aligned_mi_cols * MAX_MB_PLANE);
2871   memset(xd->above_seg_context, 0,
2872          sizeof(*xd->above_seg_context) * aligned_mi_cols);
2873 }
2874 
check_dual_ref_flags(VP9_COMP * cpi)2875 static int check_dual_ref_flags(VP9_COMP *cpi) {
2876   const int ref_flags = cpi->ref_frame_flags;
2877 
2878   if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) {
2879     return 0;
2880   } else {
2881     return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG)
2882         + !!(ref_flags & VP9_ALT_FLAG)) >= 2;
2883   }
2884 }
2885 
reset_skip_tx_size(VP9_COMMON * cm,TX_SIZE max_tx_size)2886 static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) {
2887   int mi_row, mi_col;
2888   const int mis = cm->mi_stride;
2889   MODE_INFO **mi_ptr = cm->mi_grid_visible;
2890 
2891   for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) {
2892     for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) {
2893       if (mi_ptr[mi_col]->mbmi.tx_size > max_tx_size)
2894         mi_ptr[mi_col]->mbmi.tx_size = max_tx_size;
2895     }
2896   }
2897 }
2898 
get_frame_type(const VP9_COMP * cpi)2899 static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) {
2900   if (frame_is_intra_only(&cpi->common))
2901     return INTRA_FRAME;
2902   else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame)
2903     return ALTREF_FRAME;
2904   else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)
2905     return GOLDEN_FRAME;
2906   else
2907     return LAST_FRAME;
2908 }
2909 
select_tx_mode(const VP9_COMP * cpi,MACROBLOCKD * const xd)2910 static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) {
2911   if (xd->lossless)
2912     return ONLY_4X4;
2913   if (cpi->common.frame_type == KEY_FRAME &&
2914       cpi->sf.use_nonrd_pick_mode)
2915     return ALLOW_16X16;
2916   if (cpi->sf.tx_size_search_method == USE_LARGESTALL)
2917     return ALLOW_32X32;
2918   else if (cpi->sf.tx_size_search_method == USE_FULL_RD||
2919            cpi->sf.tx_size_search_method == USE_TX_8X8)
2920     return TX_MODE_SELECT;
2921   else
2922     return cpi->common.tx_mode;
2923 }
2924 
hybrid_intra_mode_search(VP9_COMP * cpi,MACROBLOCK * const x,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)2925 static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x,
2926                                      RD_COST *rd_cost, BLOCK_SIZE bsize,
2927                                      PICK_MODE_CONTEXT *ctx) {
2928   if (bsize < BLOCK_16X16)
2929     vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX);
2930   else
2931     vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx);
2932 }
2933 
nonrd_pick_sb_modes(VP9_COMP * cpi,TileDataEnc * tile_data,MACROBLOCK * const x,int mi_row,int mi_col,RD_COST * rd_cost,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)2934 static void nonrd_pick_sb_modes(VP9_COMP *cpi,
2935                                 TileDataEnc *tile_data, MACROBLOCK *const x,
2936                                 int mi_row, int mi_col, RD_COST *rd_cost,
2937                                 BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) {
2938   VP9_COMMON *const cm = &cpi->common;
2939   TileInfo *const tile_info = &tile_data->tile_info;
2940   MACROBLOCKD *const xd = &x->e_mbd;
2941   MB_MODE_INFO *mbmi;
2942   set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize);
2943   mbmi = &xd->mi[0]->mbmi;
2944   mbmi->sb_type = bsize;
2945 
2946   if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled)
2947     if (cyclic_refresh_segment_id_boosted(mbmi->segment_id))
2948       x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh);
2949 
2950   if (cm->frame_type == KEY_FRAME)
2951     hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx);
2952   else if (segfeature_active(&cm->seg, mbmi->segment_id, SEG_LVL_SKIP))
2953     set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize);
2954   else if (bsize >= BLOCK_8X8)
2955     vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col,
2956                         rd_cost, bsize, ctx);
2957   else
2958     vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col,
2959                                rd_cost, bsize, ctx);
2960 
2961   duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
2962 
2963   if (rd_cost->rate == INT_MAX)
2964     vp9_rd_cost_reset(rd_cost);
2965 
2966   ctx->rate = rd_cost->rate;
2967   ctx->dist = rd_cost->dist;
2968 }
2969 
fill_mode_info_sb(VP9_COMMON * cm,MACROBLOCK * x,int mi_row,int mi_col,BLOCK_SIZE bsize,PC_TREE * pc_tree)2970 static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x,
2971                               int mi_row, int mi_col,
2972                               BLOCK_SIZE bsize,
2973                               PC_TREE *pc_tree) {
2974   MACROBLOCKD *xd = &x->e_mbd;
2975   int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
2976   PARTITION_TYPE partition = pc_tree->partitioning;
2977   BLOCK_SIZE subsize = get_subsize(bsize, partition);
2978 
2979   assert(bsize >= BLOCK_8X8);
2980 
2981   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
2982     return;
2983 
2984   switch (partition) {
2985     case PARTITION_NONE:
2986       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
2987       *(xd->mi[0]) = pc_tree->none.mic;
2988       *(x->mbmi_ext) = pc_tree->none.mbmi_ext;
2989       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize);
2990       break;
2991     case PARTITION_VERT:
2992       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
2993       *(xd->mi[0]) = pc_tree->vertical[0].mic;
2994       *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext;
2995       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
2996 
2997       if (mi_col + hbs < cm->mi_cols) {
2998         set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs);
2999         *(xd->mi[0]) = pc_tree->vertical[1].mic;
3000         *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext;
3001         duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize);
3002       }
3003       break;
3004     case PARTITION_HORZ:
3005       set_mode_info_offsets(cm, x, xd, mi_row, mi_col);
3006       *(xd->mi[0]) = pc_tree->horizontal[0].mic;
3007       *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext;
3008       duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize);
3009       if (mi_row + hbs < cm->mi_rows) {
3010         set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col);
3011         *(xd->mi[0]) = pc_tree->horizontal[1].mic;
3012         *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext;
3013         duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize);
3014       }
3015       break;
3016     case PARTITION_SPLIT: {
3017       fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]);
3018       fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize,
3019                         pc_tree->split[1]);
3020       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize,
3021                         pc_tree->split[2]);
3022       fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize,
3023                         pc_tree->split[3]);
3024       break;
3025     }
3026     default:
3027       break;
3028   }
3029 }
3030 
3031 // Reset the prediction pixel ready flag recursively.
pred_pixel_ready_reset(PC_TREE * pc_tree,BLOCK_SIZE bsize)3032 static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) {
3033   pc_tree->none.pred_pixel_ready = 0;
3034   pc_tree->horizontal[0].pred_pixel_ready = 0;
3035   pc_tree->horizontal[1].pred_pixel_ready = 0;
3036   pc_tree->vertical[0].pred_pixel_ready = 0;
3037   pc_tree->vertical[1].pred_pixel_ready = 0;
3038 
3039   if (bsize > BLOCK_8X8) {
3040     BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT);
3041     int i;
3042     for (i = 0; i < 4; ++i)
3043       pred_pixel_ready_reset(pc_tree->split[i], subsize);
3044   }
3045 }
3046 
nonrd_pick_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,RD_COST * rd_cost,int do_recon,int64_t best_rd,PC_TREE * pc_tree)3047 static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td,
3048                                  TileDataEnc *tile_data,
3049                                  TOKENEXTRA **tp, int mi_row,
3050                                  int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost,
3051                                  int do_recon, int64_t best_rd,
3052                                  PC_TREE *pc_tree) {
3053   const SPEED_FEATURES *const sf = &cpi->sf;
3054   VP9_COMMON *const cm = &cpi->common;
3055   TileInfo *const tile_info = &tile_data->tile_info;
3056   MACROBLOCK *const x = &td->mb;
3057   MACROBLOCKD *const xd = &x->e_mbd;
3058   const int ms = num_8x8_blocks_wide_lookup[bsize] / 2;
3059   TOKENEXTRA *tp_orig = *tp;
3060   PICK_MODE_CONTEXT *ctx = &pc_tree->none;
3061   int i;
3062   BLOCK_SIZE subsize = bsize;
3063   RD_COST this_rdc, sum_rdc, best_rdc;
3064   int do_split = bsize >= BLOCK_8X8;
3065   int do_rect = 1;
3066   // Override skipping rectangular partition operations for edge blocks
3067   const int force_horz_split = (mi_row + ms >= cm->mi_rows);
3068   const int force_vert_split = (mi_col + ms >= cm->mi_cols);
3069   const int xss = x->e_mbd.plane[1].subsampling_x;
3070   const int yss = x->e_mbd.plane[1].subsampling_y;
3071 
3072   int partition_none_allowed = !force_horz_split && !force_vert_split;
3073   int partition_horz_allowed = !force_vert_split && yss <= xss &&
3074                                bsize >= BLOCK_8X8;
3075   int partition_vert_allowed = !force_horz_split && xss <= yss &&
3076                                bsize >= BLOCK_8X8;
3077   (void) *tp_orig;
3078 
3079   assert(num_8x8_blocks_wide_lookup[bsize] ==
3080              num_8x8_blocks_high_lookup[bsize]);
3081 
3082   vp9_rd_cost_init(&sum_rdc);
3083   vp9_rd_cost_reset(&best_rdc);
3084   best_rdc.rdcost = best_rd;
3085 
3086   // Determine partition types in search according to the speed features.
3087   // The threshold set here has to be of square block size.
3088   if (sf->auto_min_max_partition_size) {
3089     partition_none_allowed &= (bsize <= x->max_partition_size &&
3090                                bsize >= x->min_partition_size);
3091     partition_horz_allowed &= ((bsize <= x->max_partition_size &&
3092                                 bsize > x->min_partition_size) ||
3093                                 force_horz_split);
3094     partition_vert_allowed &= ((bsize <= x->max_partition_size &&
3095                                 bsize > x->min_partition_size) ||
3096                                 force_vert_split);
3097     do_split &= bsize > x->min_partition_size;
3098   }
3099   if (sf->use_square_partition_only) {
3100     partition_horz_allowed &= force_horz_split;
3101     partition_vert_allowed &= force_vert_split;
3102   }
3103 
3104   ctx->pred_pixel_ready = !(partition_vert_allowed ||
3105                             partition_horz_allowed ||
3106                             do_split);
3107 
3108   // PARTITION_NONE
3109   if (partition_none_allowed) {
3110     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col,
3111                         &this_rdc, bsize, ctx);
3112     ctx->mic.mbmi = xd->mi[0]->mbmi;
3113     ctx->mbmi_ext = *x->mbmi_ext;
3114     ctx->skip_txfm[0] = x->skip_txfm[0];
3115     ctx->skip = x->skip;
3116 
3117     if (this_rdc.rate != INT_MAX) {
3118       int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3119       this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE];
3120       this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
3121                               this_rdc.rate, this_rdc.dist);
3122       if (this_rdc.rdcost < best_rdc.rdcost) {
3123         int64_t dist_breakout_thr = sf->partition_search_breakout_dist_thr;
3124         int64_t rate_breakout_thr = sf->partition_search_breakout_rate_thr;
3125 
3126         dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] +
3127             b_height_log2_lookup[bsize]);
3128 
3129         rate_breakout_thr *= num_pels_log2_lookup[bsize];
3130 
3131         best_rdc = this_rdc;
3132         if (bsize >= BLOCK_8X8)
3133           pc_tree->partitioning = PARTITION_NONE;
3134 
3135         if (!x->e_mbd.lossless &&
3136             this_rdc.rate < rate_breakout_thr &&
3137             this_rdc.dist < dist_breakout_thr) {
3138           do_split = 0;
3139           do_rect = 0;
3140         }
3141       }
3142     }
3143   }
3144 
3145   // store estimated motion vector
3146   store_pred_mv(x, ctx);
3147 
3148   // PARTITION_SPLIT
3149   if (do_split) {
3150     int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3151     sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT];
3152     sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist);
3153     subsize = get_subsize(bsize, PARTITION_SPLIT);
3154     for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) {
3155       const int x_idx = (i & 1) * ms;
3156       const int y_idx = (i >> 1) * ms;
3157 
3158       if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols)
3159         continue;
3160       load_pred_mv(x, ctx);
3161       nonrd_pick_partition(cpi, td, tile_data, tp,
3162                            mi_row + y_idx, mi_col + x_idx,
3163                            subsize, &this_rdc, 0,
3164                            best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]);
3165 
3166       if (this_rdc.rate == INT_MAX) {
3167         vp9_rd_cost_reset(&sum_rdc);
3168       } else {
3169         sum_rdc.rate += this_rdc.rate;
3170         sum_rdc.dist += this_rdc.dist;
3171         sum_rdc.rdcost += this_rdc.rdcost;
3172       }
3173     }
3174 
3175     if (sum_rdc.rdcost < best_rdc.rdcost) {
3176       best_rdc = sum_rdc;
3177       pc_tree->partitioning = PARTITION_SPLIT;
3178     } else {
3179       // skip rectangular partition test when larger block size
3180       // gives better rd cost
3181       if (sf->less_rectangular_check)
3182         do_rect &= !partition_none_allowed;
3183     }
3184   }
3185 
3186   // PARTITION_HORZ
3187   if (partition_horz_allowed && do_rect) {
3188     subsize = get_subsize(bsize, PARTITION_HORZ);
3189     if (sf->adaptive_motion_search)
3190       load_pred_mv(x, ctx);
3191     pc_tree->horizontal[0].pred_pixel_ready = 1;
3192     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3193                         &pc_tree->horizontal[0]);
3194 
3195     pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
3196     pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3197     pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3198     pc_tree->horizontal[0].skip = x->skip;
3199 
3200     if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) {
3201       load_pred_mv(x, ctx);
3202       pc_tree->horizontal[1].pred_pixel_ready = 1;
3203       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col,
3204                           &this_rdc, subsize,
3205                           &pc_tree->horizontal[1]);
3206 
3207       pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
3208       pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3209       pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3210       pc_tree->horizontal[1].skip = x->skip;
3211 
3212       if (this_rdc.rate == INT_MAX) {
3213         vp9_rd_cost_reset(&sum_rdc);
3214       } else {
3215         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3216         this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ];
3217         sum_rdc.rate += this_rdc.rate;
3218         sum_rdc.dist += this_rdc.dist;
3219         sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
3220                                 sum_rdc.rate, sum_rdc.dist);
3221       }
3222     }
3223 
3224     if (sum_rdc.rdcost < best_rdc.rdcost) {
3225       best_rdc = sum_rdc;
3226       pc_tree->partitioning = PARTITION_HORZ;
3227     } else {
3228       pred_pixel_ready_reset(pc_tree, bsize);
3229     }
3230   }
3231 
3232   // PARTITION_VERT
3233   if (partition_vert_allowed && do_rect) {
3234     subsize = get_subsize(bsize, PARTITION_VERT);
3235     if (sf->adaptive_motion_search)
3236       load_pred_mv(x, ctx);
3237     pc_tree->vertical[0].pred_pixel_ready = 1;
3238     nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize,
3239                         &pc_tree->vertical[0]);
3240     pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
3241     pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3242     pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3243     pc_tree->vertical[0].skip = x->skip;
3244 
3245     if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) {
3246       load_pred_mv(x, ctx);
3247       pc_tree->vertical[1].pred_pixel_ready = 1;
3248       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms,
3249                           &this_rdc, subsize,
3250                           &pc_tree->vertical[1]);
3251       pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
3252       pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3253       pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3254       pc_tree->vertical[1].skip = x->skip;
3255 
3256       if (this_rdc.rate == INT_MAX) {
3257         vp9_rd_cost_reset(&sum_rdc);
3258       } else {
3259         int pl = partition_plane_context(xd, mi_row, mi_col, bsize);
3260         sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT];
3261         sum_rdc.rate += this_rdc.rate;
3262         sum_rdc.dist += this_rdc.dist;
3263         sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv,
3264                                 sum_rdc.rate, sum_rdc.dist);
3265       }
3266     }
3267 
3268     if (sum_rdc.rdcost < best_rdc.rdcost) {
3269       best_rdc = sum_rdc;
3270       pc_tree->partitioning = PARTITION_VERT;
3271     } else {
3272       pred_pixel_ready_reset(pc_tree, bsize);
3273     }
3274   }
3275 
3276   *rd_cost = best_rdc;
3277 
3278   if (best_rdc.rate == INT_MAX) {
3279     vp9_rd_cost_reset(rd_cost);
3280     return;
3281   }
3282 
3283   // update mode info array
3284   fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree);
3285 
3286   if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) {
3287     int output_enabled = (bsize == BLOCK_64X64);
3288     encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3289                  bsize, pc_tree);
3290   }
3291 
3292   if (bsize == BLOCK_64X64 && do_recon) {
3293     assert(tp_orig < *tp);
3294     assert(best_rdc.rate < INT_MAX);
3295     assert(best_rdc.dist < INT64_MAX);
3296   } else {
3297     assert(tp_orig == *tp);
3298   }
3299 }
3300 
nonrd_select_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled,RD_COST * rd_cost,PC_TREE * pc_tree)3301 static void nonrd_select_partition(VP9_COMP *cpi,
3302                                    ThreadData *td,
3303                                    TileDataEnc *tile_data,
3304                                    MODE_INFO **mi,
3305                                    TOKENEXTRA **tp,
3306                                    int mi_row, int mi_col,
3307                                    BLOCK_SIZE bsize, int output_enabled,
3308                                    RD_COST *rd_cost, PC_TREE *pc_tree) {
3309   VP9_COMMON *const cm = &cpi->common;
3310   TileInfo *const tile_info = &tile_data->tile_info;
3311   MACROBLOCK *const x = &td->mb;
3312   MACROBLOCKD *const xd = &x->e_mbd;
3313   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3314   const int mis = cm->mi_stride;
3315   PARTITION_TYPE partition;
3316   BLOCK_SIZE subsize;
3317   RD_COST this_rdc;
3318 
3319   vp9_rd_cost_reset(&this_rdc);
3320   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
3321     return;
3322 
3323   subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
3324   partition = partition_lookup[bsl][subsize];
3325 
3326   if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) {
3327     x->max_partition_size = BLOCK_32X32;
3328     x->min_partition_size = BLOCK_16X16;
3329     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
3330                          rd_cost, 0, INT64_MAX, pc_tree);
3331   } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE &&
3332              subsize >= BLOCK_16X16) {
3333     x->max_partition_size = BLOCK_32X32;
3334     x->min_partition_size = BLOCK_8X8;
3335     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
3336                          rd_cost, 0, INT64_MAX, pc_tree);
3337   } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) {
3338     x->max_partition_size = BLOCK_16X16;
3339     x->min_partition_size = BLOCK_8X8;
3340     nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize,
3341                          rd_cost, 0, INT64_MAX, pc_tree);
3342   } else {
3343     switch (partition) {
3344       case PARTITION_NONE:
3345         pc_tree->none.pred_pixel_ready = 1;
3346         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
3347                             subsize, &pc_tree->none);
3348         pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
3349         pc_tree->none.mbmi_ext = *x->mbmi_ext;
3350         pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
3351         pc_tree->none.skip = x->skip;
3352         break;
3353       case PARTITION_VERT:
3354         pc_tree->vertical[0].pred_pixel_ready = 1;
3355         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
3356                             subsize, &pc_tree->vertical[0]);
3357         pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
3358         pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3359         pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3360         pc_tree->vertical[0].skip = x->skip;
3361         if (mi_col + hbs < cm->mi_cols) {
3362           pc_tree->vertical[1].pred_pixel_ready = 1;
3363           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
3364                               &this_rdc, subsize, &pc_tree->vertical[1]);
3365           pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
3366           pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3367           pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3368           pc_tree->vertical[1].skip = x->skip;
3369           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3370               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3371             rd_cost->rate += this_rdc.rate;
3372             rd_cost->dist += this_rdc.dist;
3373           }
3374         }
3375         break;
3376       case PARTITION_HORZ:
3377         pc_tree->horizontal[0].pred_pixel_ready = 1;
3378         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost,
3379                             subsize, &pc_tree->horizontal[0]);
3380         pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
3381         pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3382         pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3383         pc_tree->horizontal[0].skip = x->skip;
3384         if (mi_row + hbs < cm->mi_rows) {
3385           pc_tree->horizontal[1].pred_pixel_ready = 1;
3386           nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
3387                               &this_rdc, subsize, &pc_tree->horizontal[1]);
3388           pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
3389           pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3390           pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3391           pc_tree->horizontal[1].skip = x->skip;
3392           if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3393               rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3394             rd_cost->rate += this_rdc.rate;
3395             rd_cost->dist += this_rdc.dist;
3396           }
3397         }
3398         break;
3399       case PARTITION_SPLIT:
3400         subsize = get_subsize(bsize, PARTITION_SPLIT);
3401         nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3402                                subsize, output_enabled, rd_cost,
3403                                pc_tree->split[0]);
3404         nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp,
3405                                mi_row, mi_col + hbs, subsize, output_enabled,
3406                                &this_rdc, pc_tree->split[1]);
3407         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3408             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3409           rd_cost->rate += this_rdc.rate;
3410           rd_cost->dist += this_rdc.dist;
3411         }
3412         nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp,
3413                                mi_row + hbs, mi_col, subsize, output_enabled,
3414                                &this_rdc, pc_tree->split[2]);
3415         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3416             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3417           rd_cost->rate += this_rdc.rate;
3418           rd_cost->dist += this_rdc.dist;
3419         }
3420         nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
3421                                mi_row + hbs, mi_col + hbs, subsize,
3422                                output_enabled, &this_rdc, pc_tree->split[3]);
3423         if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX &&
3424             rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) {
3425           rd_cost->rate += this_rdc.rate;
3426           rd_cost->dist += this_rdc.dist;
3427         }
3428         break;
3429       default:
3430         assert(0 && "Invalid partition type.");
3431         break;
3432     }
3433   }
3434 
3435   if (bsize == BLOCK_64X64 && output_enabled)
3436     encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree);
3437 }
3438 
3439 
nonrd_use_partition(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,MODE_INFO ** mi,TOKENEXTRA ** tp,int mi_row,int mi_col,BLOCK_SIZE bsize,int output_enabled,RD_COST * dummy_cost,PC_TREE * pc_tree)3440 static void nonrd_use_partition(VP9_COMP *cpi,
3441                                 ThreadData *td,
3442                                 TileDataEnc *tile_data,
3443                                 MODE_INFO **mi,
3444                                 TOKENEXTRA **tp,
3445                                 int mi_row, int mi_col,
3446                                 BLOCK_SIZE bsize, int output_enabled,
3447                                 RD_COST *dummy_cost, PC_TREE *pc_tree) {
3448   VP9_COMMON *const cm = &cpi->common;
3449   TileInfo *tile_info = &tile_data->tile_info;
3450   MACROBLOCK *const x = &td->mb;
3451   MACROBLOCKD *const xd = &x->e_mbd;
3452   const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4;
3453   const int mis = cm->mi_stride;
3454   PARTITION_TYPE partition;
3455   BLOCK_SIZE subsize;
3456 
3457   if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols)
3458     return;
3459 
3460   subsize = (bsize >= BLOCK_8X8) ? mi[0]->mbmi.sb_type : BLOCK_4X4;
3461   partition = partition_lookup[bsl][subsize];
3462 
3463   if (output_enabled && bsize != BLOCK_4X4) {
3464     int ctx = partition_plane_context(xd, mi_row, mi_col, bsize);
3465     td->counts->partition[ctx][partition]++;
3466   }
3467 
3468   switch (partition) {
3469     case PARTITION_NONE:
3470       pc_tree->none.pred_pixel_ready = 1;
3471       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3472                           subsize, &pc_tree->none);
3473       pc_tree->none.mic.mbmi = xd->mi[0]->mbmi;
3474       pc_tree->none.mbmi_ext = *x->mbmi_ext;
3475       pc_tree->none.skip_txfm[0] = x->skip_txfm[0];
3476       pc_tree->none.skip = x->skip;
3477       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3478                   subsize, &pc_tree->none);
3479       break;
3480     case PARTITION_VERT:
3481       pc_tree->vertical[0].pred_pixel_ready = 1;
3482       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3483                           subsize, &pc_tree->vertical[0]);
3484       pc_tree->vertical[0].mic.mbmi = xd->mi[0]->mbmi;
3485       pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext;
3486       pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0];
3487       pc_tree->vertical[0].skip = x->skip;
3488       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3489                   subsize, &pc_tree->vertical[0]);
3490       if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) {
3491         pc_tree->vertical[1].pred_pixel_ready = 1;
3492         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs,
3493                             dummy_cost, subsize, &pc_tree->vertical[1]);
3494         pc_tree->vertical[1].mic.mbmi = xd->mi[0]->mbmi;
3495         pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext;
3496         pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0];
3497         pc_tree->vertical[1].skip = x->skip;
3498         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs,
3499                     output_enabled, subsize, &pc_tree->vertical[1]);
3500       }
3501       break;
3502     case PARTITION_HORZ:
3503       pc_tree->horizontal[0].pred_pixel_ready = 1;
3504       nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3505                           subsize, &pc_tree->horizontal[0]);
3506       pc_tree->horizontal[0].mic.mbmi = xd->mi[0]->mbmi;
3507       pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext;
3508       pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0];
3509       pc_tree->horizontal[0].skip = x->skip;
3510       encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled,
3511                   subsize, &pc_tree->horizontal[0]);
3512 
3513       if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) {
3514         pc_tree->horizontal[1].pred_pixel_ready = 1;
3515         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col,
3516                             dummy_cost, subsize, &pc_tree->horizontal[1]);
3517         pc_tree->horizontal[1].mic.mbmi = xd->mi[0]->mbmi;
3518         pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext;
3519         pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0];
3520         pc_tree->horizontal[1].skip = x->skip;
3521         encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col,
3522                     output_enabled, subsize, &pc_tree->horizontal[1]);
3523       }
3524       break;
3525     case PARTITION_SPLIT:
3526       subsize = get_subsize(bsize, PARTITION_SPLIT);
3527       if (bsize == BLOCK_8X8) {
3528         nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost,
3529                             subsize, pc_tree->leaf_split[0]);
3530         encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col,
3531                     output_enabled, subsize, pc_tree->leaf_split[0]);
3532       } else {
3533         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3534                             subsize, output_enabled, dummy_cost,
3535                             pc_tree->split[0]);
3536         nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp,
3537                             mi_row, mi_col + hbs, subsize, output_enabled,
3538                             dummy_cost, pc_tree->split[1]);
3539         nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp,
3540                             mi_row + hbs, mi_col, subsize, output_enabled,
3541                             dummy_cost, pc_tree->split[2]);
3542         nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp,
3543                             mi_row + hbs, mi_col + hbs, subsize, output_enabled,
3544                             dummy_cost, pc_tree->split[3]);
3545       }
3546       break;
3547     default:
3548       assert(0 && "Invalid partition type.");
3549       break;
3550   }
3551 
3552   if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8)
3553     update_partition_context(xd, mi_row, mi_col, subsize, bsize);
3554 }
3555 
encode_nonrd_sb_row(VP9_COMP * cpi,ThreadData * td,TileDataEnc * tile_data,int mi_row,TOKENEXTRA ** tp)3556 static void encode_nonrd_sb_row(VP9_COMP *cpi,
3557                                 ThreadData *td,
3558                                 TileDataEnc *tile_data,
3559                                 int mi_row,
3560                                 TOKENEXTRA **tp) {
3561   SPEED_FEATURES *const sf = &cpi->sf;
3562   VP9_COMMON *const cm = &cpi->common;
3563   TileInfo *const tile_info = &tile_data->tile_info;
3564   MACROBLOCK *const x = &td->mb;
3565   MACROBLOCKD *const xd = &x->e_mbd;
3566   int mi_col;
3567 
3568   // Initialize the left context for the new SB row
3569   memset(&xd->left_context, 0, sizeof(xd->left_context));
3570   memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context));
3571 
3572   // Code each SB in the row
3573   for (mi_col = tile_info->mi_col_start; mi_col < tile_info->mi_col_end;
3574        mi_col += MI_BLOCK_SIZE) {
3575     const struct segmentation *const seg = &cm->seg;
3576     RD_COST dummy_rdc;
3577     const int idx_str = cm->mi_stride * mi_row + mi_col;
3578     MODE_INFO **mi = cm->mi_grid_visible + idx_str;
3579     PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type;
3580     BLOCK_SIZE bsize = BLOCK_64X64;
3581     int seg_skip = 0;
3582     x->source_variance = UINT_MAX;
3583     vp9_zero(x->pred_mv);
3584     vp9_rd_cost_init(&dummy_rdc);
3585     x->color_sensitivity[0] = 0;
3586     x->color_sensitivity[1] = 0;
3587 
3588     if (seg->enabled) {
3589       const uint8_t *const map = seg->update_map ? cpi->segmentation_map
3590                                                  : cm->last_frame_seg_map;
3591       int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col);
3592       seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP);
3593       if (seg_skip) {
3594         partition_search_type = FIXED_PARTITION;
3595       }
3596     }
3597 
3598     // Set the partition type of the 64X64 block
3599     switch (partition_search_type) {
3600       case VAR_BASED_PARTITION:
3601         // TODO(jingning, marpan): The mode decision and encoding process
3602         // support both intra and inter sub8x8 block coding for RTC mode.
3603         // Tune the thresholds accordingly to use sub8x8 block coding for
3604         // coding performance improvement.
3605         choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
3606         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3607                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3608         break;
3609       case SOURCE_VAR_BASED_PARTITION:
3610         set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col);
3611         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3612                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3613         break;
3614       case FIXED_PARTITION:
3615         if (!seg_skip)
3616           bsize = sf->always_this_block_size;
3617         set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize);
3618         nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3619                             BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3620         break;
3621       case REFERENCE_PARTITION:
3622         set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64);
3623         if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled &&
3624             xd->mi[0]->mbmi.segment_id) {
3625           // Use lower max_partition_size for low resoultions.
3626           if (cm->width <= 352 && cm->height <= 288)
3627             x->max_partition_size = BLOCK_32X32;
3628           else
3629             x->max_partition_size = BLOCK_64X64;
3630           x->min_partition_size = BLOCK_8X8;
3631           nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col,
3632                                BLOCK_64X64, &dummy_rdc, 1,
3633                                INT64_MAX, td->pc_root);
3634         } else {
3635           choose_partitioning(cpi, tile_info, x, mi_row, mi_col);
3636           // TODO(marpan): Seems like nonrd_select_partition does not support
3637           // 4x4 partition. Since 4x4 is used on key frame, use this switch
3638           // for now.
3639           if (cm->frame_type == KEY_FRAME)
3640             nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3641                                 BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3642           else
3643             nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col,
3644                                    BLOCK_64X64, 1, &dummy_rdc, td->pc_root);
3645         }
3646 
3647         break;
3648       default:
3649         assert(0);
3650         break;
3651     }
3652   }
3653 }
3654 // end RTC play code
3655 
set_var_thresh_from_histogram(VP9_COMP * cpi)3656 static int set_var_thresh_from_histogram(VP9_COMP *cpi) {
3657   const SPEED_FEATURES *const sf = &cpi->sf;
3658   const VP9_COMMON *const cm = &cpi->common;
3659 
3660   const uint8_t *src = cpi->Source->y_buffer;
3661   const uint8_t *last_src = cpi->Last_Source->y_buffer;
3662   const int src_stride = cpi->Source->y_stride;
3663   const int last_stride = cpi->Last_Source->y_stride;
3664 
3665   // Pick cutoff threshold
3666   const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) ?
3667       (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) :
3668       (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100);
3669   DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]);
3670   diff *var16 = cpi->source_diff_var;
3671 
3672   int sum = 0;
3673   int i, j;
3674 
3675   memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0]));
3676 
3677   for (i = 0; i < cm->mb_rows; i++) {
3678     for (j = 0; j < cm->mb_cols; j++) {
3679 #if CONFIG_VP9_HIGHBITDEPTH
3680       if (cm->use_highbitdepth) {
3681         switch (cm->bit_depth) {
3682           case VPX_BITS_8:
3683             vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride,
3684                                    &var16->sse, &var16->sum);
3685             break;
3686           case VPX_BITS_10:
3687             vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride,
3688                                     &var16->sse, &var16->sum);
3689             break;
3690           case VPX_BITS_12:
3691             vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride,
3692                                       &var16->sse, &var16->sum);
3693             break;
3694           default:
3695             assert(0 && "cm->bit_depth should be VPX_BITS_8, VPX_BITS_10"
3696                    " or VPX_BITS_12");
3697             return -1;
3698         }
3699       } else {
3700         vpx_get16x16var(src, src_stride, last_src, last_stride,
3701                         &var16->sse, &var16->sum);
3702       }
3703 #else
3704       vpx_get16x16var(src, src_stride, last_src, last_stride,
3705                       &var16->sse, &var16->sum);
3706 #endif  // CONFIG_VP9_HIGHBITDEPTH
3707       var16->var = var16->sse -
3708           (((uint32_t)var16->sum * var16->sum) >> 8);
3709 
3710       if (var16->var >= VAR_HIST_MAX_BG_VAR)
3711         hist[VAR_HIST_BINS - 1]++;
3712       else
3713         hist[var16->var / VAR_HIST_FACTOR]++;
3714 
3715       src += 16;
3716       last_src += 16;
3717       var16++;
3718     }
3719 
3720     src = src - cm->mb_cols * 16 + 16 * src_stride;
3721     last_src = last_src - cm->mb_cols * 16 + 16 * last_stride;
3722   }
3723 
3724   cpi->source_var_thresh = 0;
3725 
3726   if (hist[VAR_HIST_BINS - 1] < cutoff) {
3727     for (i = 0; i < VAR_HIST_BINS - 1; i++) {
3728       sum += hist[i];
3729 
3730       if (sum > cutoff) {
3731         cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR;
3732         return 0;
3733       }
3734     }
3735   }
3736 
3737   return sf->search_type_check_frequency;
3738 }
3739 
source_var_based_partition_search_method(VP9_COMP * cpi)3740 static void source_var_based_partition_search_method(VP9_COMP *cpi) {
3741   VP9_COMMON *const cm = &cpi->common;
3742   SPEED_FEATURES *const sf = &cpi->sf;
3743 
3744   if (cm->frame_type == KEY_FRAME) {
3745     // For key frame, use SEARCH_PARTITION.
3746     sf->partition_search_type = SEARCH_PARTITION;
3747   } else if (cm->intra_only) {
3748     sf->partition_search_type = FIXED_PARTITION;
3749   } else {
3750     if (cm->last_width != cm->width || cm->last_height != cm->height) {
3751       if (cpi->source_diff_var)
3752         vpx_free(cpi->source_diff_var);
3753 
3754       CHECK_MEM_ERROR(cm, cpi->source_diff_var,
3755                       vpx_calloc(cm->MBs, sizeof(diff)));
3756     }
3757 
3758     if (!cpi->frames_till_next_var_check)
3759       cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi);
3760 
3761     if (cpi->frames_till_next_var_check > 0) {
3762       sf->partition_search_type = FIXED_PARTITION;
3763       cpi->frames_till_next_var_check--;
3764     }
3765   }
3766 }
3767 
get_skip_encode_frame(const VP9_COMMON * cm,ThreadData * const td)3768 static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) {
3769   unsigned int intra_count = 0, inter_count = 0;
3770   int j;
3771 
3772   for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) {
3773     intra_count += td->counts->intra_inter[j][0];
3774     inter_count += td->counts->intra_inter[j][1];
3775   }
3776 
3777   return (intra_count << 2) < inter_count &&
3778          cm->frame_type != KEY_FRAME &&
3779          cm->show_frame;
3780 }
3781 
vp9_init_tile_data(VP9_COMP * cpi)3782 void vp9_init_tile_data(VP9_COMP *cpi) {
3783   VP9_COMMON *const cm = &cpi->common;
3784   const int tile_cols = 1 << cm->log2_tile_cols;
3785   const int tile_rows = 1 << cm->log2_tile_rows;
3786   int tile_col, tile_row;
3787   TOKENEXTRA *pre_tok = cpi->tile_tok[0][0];
3788   int tile_tok = 0;
3789 
3790   if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) {
3791     if (cpi->tile_data != NULL)
3792       vpx_free(cpi->tile_data);
3793     CHECK_MEM_ERROR(cm, cpi->tile_data,
3794         vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data)));
3795     cpi->allocated_tiles = tile_cols * tile_rows;
3796 
3797     for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3798       for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3799         TileDataEnc *tile_data =
3800             &cpi->tile_data[tile_row * tile_cols + tile_col];
3801         int i, j;
3802         for (i = 0; i < BLOCK_SIZES; ++i) {
3803           for (j = 0; j < MAX_MODES; ++j) {
3804             tile_data->thresh_freq_fact[i][j] = 32;
3805             tile_data->mode_map[i][j] = j;
3806           }
3807         }
3808       }
3809   }
3810 
3811   for (tile_row = 0; tile_row < tile_rows; ++tile_row) {
3812     for (tile_col = 0; tile_col < tile_cols; ++tile_col) {
3813       TileInfo *tile_info =
3814           &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info;
3815       vp9_tile_init(tile_info, cm, tile_row, tile_col);
3816 
3817       cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok;
3818       pre_tok = cpi->tile_tok[tile_row][tile_col];
3819       tile_tok = allocated_tokens(*tile_info);
3820     }
3821   }
3822 }
3823 
vp9_encode_tile(VP9_COMP * cpi,ThreadData * td,int tile_row,int tile_col)3824 void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td,
3825                      int tile_row, int tile_col) {
3826   VP9_COMMON *const cm = &cpi->common;
3827   const int tile_cols = 1 << cm->log2_tile_cols;
3828   TileDataEnc *this_tile =
3829       &cpi->tile_data[tile_row * tile_cols + tile_col];
3830   const TileInfo * const tile_info = &this_tile->tile_info;
3831   TOKENEXTRA *tok = cpi->tile_tok[tile_row][tile_col];
3832   int mi_row;
3833 
3834   for (mi_row = tile_info->mi_row_start; mi_row < tile_info->mi_row_end;
3835        mi_row += MI_BLOCK_SIZE) {
3836     if (cpi->sf.use_nonrd_pick_mode)
3837       encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok);
3838     else
3839       encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok);
3840   }
3841   cpi->tok_count[tile_row][tile_col] =
3842       (unsigned int)(tok - cpi->tile_tok[tile_row][tile_col]);
3843   assert(tok - cpi->tile_tok[tile_row][tile_col] <=
3844       allocated_tokens(*tile_info));
3845 }
3846 
encode_tiles(VP9_COMP * cpi)3847 static void encode_tiles(VP9_COMP *cpi) {
3848   VP9_COMMON *const cm = &cpi->common;
3849   const int tile_cols = 1 << cm->log2_tile_cols;
3850   const int tile_rows = 1 << cm->log2_tile_rows;
3851   int tile_col, tile_row;
3852 
3853   vp9_init_tile_data(cpi);
3854 
3855   for (tile_row = 0; tile_row < tile_rows; ++tile_row)
3856     for (tile_col = 0; tile_col < tile_cols; ++tile_col)
3857       vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col);
3858 }
3859 
3860 #if CONFIG_FP_MB_STATS
input_fpmb_stats(FIRSTPASS_MB_STATS * firstpass_mb_stats,VP9_COMMON * cm,uint8_t ** this_frame_mb_stats)3861 static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats,
3862                             VP9_COMMON *cm, uint8_t **this_frame_mb_stats) {
3863   uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start +
3864       cm->current_video_frame * cm->MBs * sizeof(uint8_t);
3865 
3866   if (mb_stats_in > firstpass_mb_stats->mb_stats_end)
3867     return EOF;
3868 
3869   *this_frame_mb_stats = mb_stats_in;
3870 
3871   return 1;
3872 }
3873 #endif
3874 
encode_frame_internal(VP9_COMP * cpi)3875 static void encode_frame_internal(VP9_COMP *cpi) {
3876   SPEED_FEATURES *const sf = &cpi->sf;
3877   ThreadData *const td = &cpi->td;
3878   MACROBLOCK *const x = &td->mb;
3879   VP9_COMMON *const cm = &cpi->common;
3880   MACROBLOCKD *const xd = &x->e_mbd;
3881   RD_COUNTS *const rdc = &cpi->td.rd_counts;
3882 
3883   xd->mi = cm->mi_grid_visible;
3884   xd->mi[0] = cm->mi;
3885 
3886   vp9_zero(*td->counts);
3887   vp9_zero(rdc->coef_counts);
3888   vp9_zero(rdc->comp_pred_diff);
3889   vp9_zero(rdc->filter_diff);
3890 
3891   xd->lossless = cm->base_qindex == 0 &&
3892                  cm->y_dc_delta_q == 0 &&
3893                  cm->uv_dc_delta_q == 0 &&
3894                  cm->uv_ac_delta_q == 0;
3895 
3896 #if CONFIG_VP9_HIGHBITDEPTH
3897   if (cm->use_highbitdepth)
3898     x->fwd_txm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4;
3899   else
3900     x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
3901   x->highbd_itxm_add = xd->lossless ? vp9_highbd_iwht4x4_add :
3902                                       vp9_highbd_idct4x4_add;
3903 #else
3904   x->fwd_txm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4;
3905 #endif  // CONFIG_VP9_HIGHBITDEPTH
3906   x->itxm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add;
3907 
3908   if (xd->lossless)
3909     x->optimize = 0;
3910 
3911   cm->tx_mode = select_tx_mode(cpi, xd);
3912 
3913   vp9_frame_init_quantizer(cpi);
3914 
3915   vp9_initialize_rd_consts(cpi);
3916   vp9_initialize_me_consts(cpi, x, cm->base_qindex);
3917   init_encode_frame_mb_context(cpi);
3918   cm->use_prev_frame_mvs = !cm->error_resilient_mode &&
3919                            cm->width == cm->last_width &&
3920                            cm->height == cm->last_height &&
3921                            !cm->intra_only &&
3922                            cm->last_show_frame;
3923   // Special case: set prev_mi to NULL when the previous mode info
3924   // context cannot be used.
3925   cm->prev_mi = cm->use_prev_frame_mvs ?
3926                 cm->prev_mip + cm->mi_stride + 1 : NULL;
3927 
3928   x->quant_fp = cpi->sf.use_quant_fp;
3929   vp9_zero(x->skip_txfm);
3930   if (sf->use_nonrd_pick_mode) {
3931     // Initialize internal buffer pointers for rtc coding, where non-RD
3932     // mode decision is used and hence no buffer pointer swap needed.
3933     int i;
3934     struct macroblock_plane *const p = x->plane;
3935     struct macroblockd_plane *const pd = xd->plane;
3936     PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none;
3937 
3938     for (i = 0; i < MAX_MB_PLANE; ++i) {
3939       p[i].coeff = ctx->coeff_pbuf[i][0];
3940       p[i].qcoeff = ctx->qcoeff_pbuf[i][0];
3941       pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0];
3942       p[i].eobs = ctx->eobs_pbuf[i][0];
3943     }
3944     vp9_zero(x->zcoeff_blk);
3945 
3946     if (cm->frame_type != KEY_FRAME &&
3947         cpi->rc.frames_since_golden == 0 &&
3948         !cpi->use_svc)
3949       cpi->ref_frame_flags &= (~VP9_GOLD_FLAG);
3950 
3951     if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION)
3952       source_var_based_partition_search_method(cpi);
3953   }
3954 
3955   {
3956     struct vpx_usec_timer emr_timer;
3957     vpx_usec_timer_start(&emr_timer);
3958 
3959 #if CONFIG_FP_MB_STATS
3960   if (cpi->use_fp_mb_stats) {
3961     input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm,
3962                      &cpi->twopass.this_frame_mb_stats);
3963   }
3964 #endif
3965 
3966     // If allowed, encoding tiles in parallel with one thread handling one tile.
3967     if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1)
3968       vp9_encode_tiles_mt(cpi);
3969     else
3970       encode_tiles(cpi);
3971 
3972     vpx_usec_timer_mark(&emr_timer);
3973     cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer);
3974   }
3975 
3976   sf->skip_encode_frame = sf->skip_encode_sb ?
3977       get_skip_encode_frame(cm, td) : 0;
3978 
3979 #if 0
3980   // Keep record of the total distortion this time around for future use
3981   cpi->last_frame_distortion = cpi->frame_distortion;
3982 #endif
3983 }
3984 
get_interp_filter(const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS],int is_alt_ref)3985 static INTERP_FILTER get_interp_filter(
3986     const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) {
3987   if (!is_alt_ref &&
3988       threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] &&
3989       threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] &&
3990       threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) {
3991     return EIGHTTAP_SMOOTH;
3992   } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] &&
3993              threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) {
3994     return EIGHTTAP_SHARP;
3995   } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) {
3996     return EIGHTTAP;
3997   } else {
3998     return SWITCHABLE;
3999   }
4000 }
4001 
vp9_encode_frame(VP9_COMP * cpi)4002 void vp9_encode_frame(VP9_COMP *cpi) {
4003   VP9_COMMON *const cm = &cpi->common;
4004 
4005   // In the longer term the encoder should be generalized to match the
4006   // decoder such that we allow compound where one of the 3 buffers has a
4007   // different sign bias and that buffer is then the fixed ref. However, this
4008   // requires further work in the rd loop. For now the only supported encoder
4009   // side behavior is where the ALT ref buffer has opposite sign bias to
4010   // the other two.
4011   if (!frame_is_intra_only(cm)) {
4012     if ((cm->ref_frame_sign_bias[ALTREF_FRAME] ==
4013              cm->ref_frame_sign_bias[GOLDEN_FRAME]) ||
4014         (cm->ref_frame_sign_bias[ALTREF_FRAME] ==
4015              cm->ref_frame_sign_bias[LAST_FRAME])) {
4016       cpi->allow_comp_inter_inter = 0;
4017     } else {
4018       cpi->allow_comp_inter_inter = 1;
4019       cm->comp_fixed_ref = ALTREF_FRAME;
4020       cm->comp_var_ref[0] = LAST_FRAME;
4021       cm->comp_var_ref[1] = GOLDEN_FRAME;
4022     }
4023   }
4024 
4025   if (cpi->sf.frame_parameter_update) {
4026     int i;
4027     RD_OPT *const rd_opt = &cpi->rd;
4028     FRAME_COUNTS *counts = cpi->td.counts;
4029     RD_COUNTS *const rdc = &cpi->td.rd_counts;
4030 
4031     // This code does a single RD pass over the whole frame assuming
4032     // either compound, single or hybrid prediction as per whatever has
4033     // worked best for that type of frame in the past.
4034     // It also predicts whether another coding mode would have worked
4035     // better that this coding mode. If that is the case, it remembers
4036     // that for subsequent frames.
4037     // It does the same analysis for transform size selection also.
4038     const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi);
4039     int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type];
4040     int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type];
4041     const int is_alt_ref = frame_type == ALTREF_FRAME;
4042 
4043     /* prediction (compound, single or hybrid) mode selection */
4044     if (is_alt_ref || !cpi->allow_comp_inter_inter)
4045       cm->reference_mode = SINGLE_REFERENCE;
4046     else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] &&
4047              mode_thrs[COMPOUND_REFERENCE] >
4048                  mode_thrs[REFERENCE_MODE_SELECT] &&
4049              check_dual_ref_flags(cpi) &&
4050              cpi->static_mb_pct == 100)
4051       cm->reference_mode = COMPOUND_REFERENCE;
4052     else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT])
4053       cm->reference_mode = SINGLE_REFERENCE;
4054     else
4055       cm->reference_mode = REFERENCE_MODE_SELECT;
4056 
4057     if (cm->interp_filter == SWITCHABLE)
4058       cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref);
4059 
4060     encode_frame_internal(cpi);
4061 
4062     for (i = 0; i < REFERENCE_MODES; ++i)
4063       mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2;
4064 
4065     for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i)
4066       filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2;
4067 
4068     if (cm->reference_mode == REFERENCE_MODE_SELECT) {
4069       int single_count_zero = 0;
4070       int comp_count_zero = 0;
4071 
4072       for (i = 0; i < COMP_INTER_CONTEXTS; i++) {
4073         single_count_zero += counts->comp_inter[i][0];
4074         comp_count_zero += counts->comp_inter[i][1];
4075       }
4076 
4077       if (comp_count_zero == 0) {
4078         cm->reference_mode = SINGLE_REFERENCE;
4079         vp9_zero(counts->comp_inter);
4080       } else if (single_count_zero == 0) {
4081         cm->reference_mode = COMPOUND_REFERENCE;
4082         vp9_zero(counts->comp_inter);
4083       }
4084     }
4085 
4086     if (cm->tx_mode == TX_MODE_SELECT) {
4087       int count4x4 = 0;
4088       int count8x8_lp = 0, count8x8_8x8p = 0;
4089       int count16x16_16x16p = 0, count16x16_lp = 0;
4090       int count32x32 = 0;
4091 
4092       for (i = 0; i < TX_SIZE_CONTEXTS; ++i) {
4093         count4x4 += counts->tx.p32x32[i][TX_4X4];
4094         count4x4 += counts->tx.p16x16[i][TX_4X4];
4095         count4x4 += counts->tx.p8x8[i][TX_4X4];
4096 
4097         count8x8_lp += counts->tx.p32x32[i][TX_8X8];
4098         count8x8_lp += counts->tx.p16x16[i][TX_8X8];
4099         count8x8_8x8p += counts->tx.p8x8[i][TX_8X8];
4100 
4101         count16x16_16x16p += counts->tx.p16x16[i][TX_16X16];
4102         count16x16_lp += counts->tx.p32x32[i][TX_16X16];
4103         count32x32 += counts->tx.p32x32[i][TX_32X32];
4104       }
4105       if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 &&
4106           count32x32 == 0) {
4107         cm->tx_mode = ALLOW_8X8;
4108         reset_skip_tx_size(cm, TX_8X8);
4109       } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 &&
4110                  count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) {
4111         cm->tx_mode = ONLY_4X4;
4112         reset_skip_tx_size(cm, TX_4X4);
4113       } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) {
4114         cm->tx_mode = ALLOW_32X32;
4115       } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) {
4116         cm->tx_mode = ALLOW_16X16;
4117         reset_skip_tx_size(cm, TX_16X16);
4118       }
4119     }
4120   } else {
4121     cm->reference_mode = SINGLE_REFERENCE;
4122     encode_frame_internal(cpi);
4123   }
4124 }
4125 
sum_intra_stats(FRAME_COUNTS * counts,const MODE_INFO * mi)4126 static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) {
4127   const PREDICTION_MODE y_mode = mi->mbmi.mode;
4128   const PREDICTION_MODE uv_mode = mi->mbmi.uv_mode;
4129   const BLOCK_SIZE bsize = mi->mbmi.sb_type;
4130 
4131   if (bsize < BLOCK_8X8) {
4132     int idx, idy;
4133     const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize];
4134     const int num_4x4_h = num_4x4_blocks_high_lookup[bsize];
4135     for (idy = 0; idy < 2; idy += num_4x4_h)
4136       for (idx = 0; idx < 2; idx += num_4x4_w)
4137         ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode];
4138   } else {
4139     ++counts->y_mode[size_group_lookup[bsize]][y_mode];
4140   }
4141 
4142   ++counts->uv_mode[y_mode][uv_mode];
4143 }
4144 
encode_superblock(VP9_COMP * cpi,ThreadData * td,TOKENEXTRA ** t,int output_enabled,int mi_row,int mi_col,BLOCK_SIZE bsize,PICK_MODE_CONTEXT * ctx)4145 static void encode_superblock(VP9_COMP *cpi, ThreadData *td,
4146                               TOKENEXTRA **t, int output_enabled,
4147                               int mi_row, int mi_col, BLOCK_SIZE bsize,
4148                               PICK_MODE_CONTEXT *ctx) {
4149   VP9_COMMON *const cm = &cpi->common;
4150   MACROBLOCK *const x = &td->mb;
4151   MACROBLOCKD *const xd = &x->e_mbd;
4152   MODE_INFO **mi_8x8 = xd->mi;
4153   MODE_INFO *mi = mi_8x8[0];
4154   MB_MODE_INFO *mbmi = &mi->mbmi;
4155   const int seg_skip = segfeature_active(&cm->seg, mbmi->segment_id,
4156                                          SEG_LVL_SKIP);
4157   const int mis = cm->mi_stride;
4158   const int mi_width = num_8x8_blocks_wide_lookup[bsize];
4159   const int mi_height = num_8x8_blocks_high_lookup[bsize];
4160 
4161   x->skip_recode = !x->select_tx_size && mbmi->sb_type >= BLOCK_8X8 &&
4162                    cpi->oxcf.aq_mode != COMPLEXITY_AQ &&
4163                    cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ &&
4164                    cpi->sf.allow_skip_recode;
4165 
4166   if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode)
4167     memset(x->skip_txfm, 0, sizeof(x->skip_txfm));
4168 
4169   x->skip_optimize = ctx->is_coded;
4170   ctx->is_coded = 1;
4171   x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct;
4172   x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame &&
4173                     x->q_index < QIDX_SKIP_THRESH);
4174 
4175   if (x->skip_encode)
4176     return;
4177 
4178   if (!is_inter_block(mbmi)) {
4179     int plane;
4180     mbmi->skip = 1;
4181     for (plane = 0; plane < MAX_MB_PLANE; ++plane)
4182       vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane);
4183     if (output_enabled)
4184       sum_intra_stats(td->counts, mi);
4185     vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
4186   } else {
4187     int ref;
4188     const int is_compound = has_second_ref(mbmi);
4189     set_ref_ptrs(cm, xd, mbmi->ref_frame[0], mbmi->ref_frame[1]);
4190     for (ref = 0; ref < 1 + is_compound; ++ref) {
4191       YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi,
4192                                                      mbmi->ref_frame[ref]);
4193       assert(cfg != NULL);
4194       vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col,
4195                            &xd->block_refs[ref]->sf);
4196     }
4197     if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip)
4198       vp9_build_inter_predictors_sby(xd, mi_row, mi_col,
4199                                      VPXMAX(bsize, BLOCK_8X8));
4200 
4201     vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col,
4202                                     VPXMAX(bsize, BLOCK_8X8));
4203 
4204     vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8));
4205     vp9_tokenize_sb(cpi, td, t, !output_enabled, VPXMAX(bsize, BLOCK_8X8));
4206   }
4207 
4208   if (output_enabled) {
4209     if (cm->tx_mode == TX_MODE_SELECT &&
4210         mbmi->sb_type >= BLOCK_8X8  &&
4211         !(is_inter_block(mbmi) && (mbmi->skip || seg_skip))) {
4212       ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd),
4213                       &td->counts->tx)[mbmi->tx_size];
4214     } else {
4215       int x, y;
4216       TX_SIZE tx_size;
4217       // The new intra coding scheme requires no change of transform size
4218       if (is_inter_block(&mi->mbmi)) {
4219         tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode],
4220                          max_txsize_lookup[bsize]);
4221       } else {
4222         tx_size = (bsize >= BLOCK_8X8) ? mbmi->tx_size : TX_4X4;
4223       }
4224 
4225       for (y = 0; y < mi_height; y++)
4226         for (x = 0; x < mi_width; x++)
4227           if (mi_col + x < cm->mi_cols && mi_row + y < cm->mi_rows)
4228             mi_8x8[mis * y + x]->mbmi.tx_size = tx_size;
4229     }
4230     ++td->counts->tx.tx_totals[mbmi->tx_size];
4231     ++td->counts->tx.tx_totals[get_uv_tx_size(mbmi, &xd->plane[1])];
4232     if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ)
4233       vp9_cyclic_refresh_update_sb_postencode(cpi, mbmi, mi_row, mi_col, bsize);
4234   }
4235 }
4236