1 /*
2 * Copyright (c) 2019, Alliance for Open Media. All rights reserved
3 *
4 * This source code is subject to the terms of the BSD 2 Clause License and
5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6 * was not distributed with this source code in the LICENSE file, you can
7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8 * Media Patent License 1.0 was not distributed with this source code in the
9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10 */
11
12 #include <limits.h>
13 #include <math.h>
14 #include <stdbool.h>
15 #include <stdio.h>
16
17 #include "config/aom_config.h"
18 #include "config/aom_dsp_rtcd.h"
19 #include "config/av1_rtcd.h"
20
21 #include "aom_dsp/aom_dsp_common.h"
22 #include "aom_dsp/binary_codes_writer.h"
23 #include "aom_ports/mem.h"
24 #include "aom_ports/aom_timer.h"
25 #include "aom_ports/system_state.h"
26
27 #include "av1/common/reconinter.h"
28 #include "av1/common/blockd.h"
29
30 #include "av1/encoder/encodeframe.h"
31 #include "av1/encoder/var_based_part.h"
32 #include "av1/encoder/reconinter_enc.h"
33
34 extern const uint8_t AV1_VAR_OFFS[];
35
36 typedef struct {
37 // TODO(kyslov): consider changing to 64bit
38
39 // This struct is used for computing variance in choose_partitioning(), where
40 // the max number of samples within a superblock is 32x32 (with 4x4 avg).
41 // With 8bit bitdepth, uint32_t is enough for sum_square_error (2^8 * 2^8 * 32
42 // * 32 = 2^26). For high bitdepth we need to consider changing this to 64 bit
43 uint32_t sum_square_error;
44 int32_t sum_error;
45 int log2_count;
46 int variance;
47 } var;
48
49 typedef struct {
50 var none;
51 var horz[2];
52 var vert[2];
53 } partition_variance;
54
55 typedef struct {
56 partition_variance part_variances;
57 var split[4];
58 } v4x4;
59
60 typedef struct {
61 partition_variance part_variances;
62 v4x4 split[4];
63 } v8x8;
64
65 typedef struct {
66 partition_variance part_variances;
67 v8x8 split[4];
68 } v16x16;
69
70 typedef struct {
71 partition_variance part_variances;
72 v16x16 split[4];
73 } v32x32;
74
75 typedef struct {
76 partition_variance part_variances;
77 v32x32 split[4];
78 } v64x64;
79
80 typedef struct {
81 partition_variance part_variances;
82 v64x64 split[4];
83 } v128x128;
84
85 typedef struct {
86 partition_variance *part_variances;
87 var *split[4];
88 } variance_node;
89
tree_to_node(void * data,BLOCK_SIZE bsize,variance_node * node)90 static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) {
91 int i;
92 node->part_variances = NULL;
93 switch (bsize) {
94 case BLOCK_128X128: {
95 v128x128 *vt = (v128x128 *)data;
96 node->part_variances = &vt->part_variances;
97 for (i = 0; i < 4; i++)
98 node->split[i] = &vt->split[i].part_variances.none;
99 break;
100 }
101 case BLOCK_64X64: {
102 v64x64 *vt = (v64x64 *)data;
103 node->part_variances = &vt->part_variances;
104 for (i = 0; i < 4; i++)
105 node->split[i] = &vt->split[i].part_variances.none;
106 break;
107 }
108 case BLOCK_32X32: {
109 v32x32 *vt = (v32x32 *)data;
110 node->part_variances = &vt->part_variances;
111 for (i = 0; i < 4; i++)
112 node->split[i] = &vt->split[i].part_variances.none;
113 break;
114 }
115 case BLOCK_16X16: {
116 v16x16 *vt = (v16x16 *)data;
117 node->part_variances = &vt->part_variances;
118 for (i = 0; i < 4; i++)
119 node->split[i] = &vt->split[i].part_variances.none;
120 break;
121 }
122 case BLOCK_8X8: {
123 v8x8 *vt = (v8x8 *)data;
124 node->part_variances = &vt->part_variances;
125 for (i = 0; i < 4; i++)
126 node->split[i] = &vt->split[i].part_variances.none;
127 break;
128 }
129 default: {
130 v4x4 *vt = (v4x4 *)data;
131 assert(bsize == BLOCK_4X4);
132 node->part_variances = &vt->part_variances;
133 for (i = 0; i < 4; i++) node->split[i] = &vt->split[i];
134 break;
135 }
136 }
137 }
138
139 // Set variance values given sum square error, sum error, count.
fill_variance(uint32_t s2,int32_t s,int c,var * v)140 static void fill_variance(uint32_t s2, int32_t s, int c, var *v) {
141 v->sum_square_error = s2;
142 v->sum_error = s;
143 v->log2_count = c;
144 }
145
get_variance(var * v)146 static void get_variance(var *v) {
147 v->variance =
148 (int)(256 * (v->sum_square_error -
149 (uint32_t)(((int64_t)v->sum_error * v->sum_error) >>
150 v->log2_count)) >>
151 v->log2_count);
152 }
153
sum_2_variances(const var * a,const var * b,var * r)154 static void sum_2_variances(const var *a, const var *b, var *r) {
155 assert(a->log2_count == b->log2_count);
156 fill_variance(a->sum_square_error + b->sum_square_error,
157 a->sum_error + b->sum_error, a->log2_count + 1, r);
158 }
159
fill_variance_tree(void * data,BLOCK_SIZE bsize)160 static void fill_variance_tree(void *data, BLOCK_SIZE bsize) {
161 variance_node node;
162 memset(&node, 0, sizeof(node));
163 tree_to_node(data, bsize, &node);
164 sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]);
165 sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]);
166 sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]);
167 sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]);
168 sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1],
169 &node.part_variances->none);
170 }
171
set_block_size(AV1_COMP * const cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,int mi_row,int mi_col,BLOCK_SIZE bsize)172 static void set_block_size(AV1_COMP *const cpi, MACROBLOCK *const x,
173 MACROBLOCKD *const xd, int mi_row, int mi_col,
174 BLOCK_SIZE bsize) {
175 if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) {
176 set_mode_info_offsets(cpi, x, xd, mi_row, mi_col);
177 xd->mi[0]->sb_type = bsize;
178 }
179 }
180
set_vt_partitioning(AV1_COMP * cpi,MACROBLOCK * const x,MACROBLOCKD * const xd,const TileInfo * const tile,void * data,BLOCK_SIZE bsize,int mi_row,int mi_col,int64_t threshold,BLOCK_SIZE bsize_min,int force_split)181 static int set_vt_partitioning(AV1_COMP *cpi, MACROBLOCK *const x,
182 MACROBLOCKD *const xd,
183 const TileInfo *const tile, void *data,
184 BLOCK_SIZE bsize, int mi_row, int mi_col,
185 int64_t threshold, BLOCK_SIZE bsize_min,
186 int force_split) {
187 AV1_COMMON *const cm = &cpi->common;
188 variance_node vt;
189 const int block_width = mi_size_wide[bsize];
190 const int block_height = mi_size_high[bsize];
191
192 assert(block_height == block_width);
193 tree_to_node(data, bsize, &vt);
194
195 if (force_split == 1) return 0;
196
197 if (mi_col + block_width > tile->mi_col_end ||
198 mi_row + block_height > tile->mi_row_end)
199 return 0;
200
201 // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if
202 // variance is below threshold, otherwise split will be selected.
203 // No check for vert/horiz split as too few samples for variance.
204 if (bsize == bsize_min) {
205 // Variance already computed to set the force_split.
206 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
207 if (mi_col + block_width / 2 < cm->mi_cols &&
208 mi_row + block_height / 2 < cm->mi_rows &&
209 vt.part_variances->none.variance < threshold) {
210 set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
211 return 1;
212 }
213 return 0;
214 } else if (bsize > bsize_min) {
215 // Variance already computed to set the force_split.
216 if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none);
217 // For key frame: take split for bsize above 32X32 or very high variance.
218 if (frame_is_intra_only(cm) &&
219 (bsize > BLOCK_32X32 ||
220 vt.part_variances->none.variance > (threshold << 4))) {
221 return 0;
222 }
223 // If variance is low, take the bsize (no split).
224 if (mi_col + block_width / 2 < cm->mi_cols &&
225 mi_row + block_height / 2 < cm->mi_rows &&
226 vt.part_variances->none.variance < threshold) {
227 set_block_size(cpi, x, xd, mi_row, mi_col, bsize);
228 return 1;
229 }
230
231 // Check vertical split.
232 if (mi_row + block_height / 2 < cm->mi_rows) {
233 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_VERT);
234 get_variance(&vt.part_variances->vert[0]);
235 get_variance(&vt.part_variances->vert[1]);
236 if (vt.part_variances->vert[0].variance < threshold &&
237 vt.part_variances->vert[1].variance < threshold &&
238 get_plane_block_size(subsize, xd->plane[1].subsampling_x,
239 xd->plane[1].subsampling_y) < BLOCK_INVALID) {
240 set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
241 set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize);
242 return 1;
243 }
244 }
245 // Check horizontal split.
246 if (mi_col + block_width / 2 < cm->mi_cols) {
247 BLOCK_SIZE subsize = get_partition_subsize(bsize, PARTITION_HORZ);
248 get_variance(&vt.part_variances->horz[0]);
249 get_variance(&vt.part_variances->horz[1]);
250 if (vt.part_variances->horz[0].variance < threshold &&
251 vt.part_variances->horz[1].variance < threshold &&
252 get_plane_block_size(subsize, xd->plane[1].subsampling_x,
253 xd->plane[1].subsampling_y) < BLOCK_INVALID) {
254 set_block_size(cpi, x, xd, mi_row, mi_col, subsize);
255 set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize);
256 return 1;
257 }
258 }
259
260 return 0;
261 }
262 return 0;
263 }
264
fill_variance_8x8avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,v16x16 * vst,int pixels_wide,int pixels_high,int is_key_frame)265 static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d,
266 int dp, int x16_idx, int y16_idx, v16x16 *vst,
267 int pixels_wide, int pixels_high,
268 int is_key_frame) {
269 int k;
270 for (k = 0; k < 4; k++) {
271 int x8_idx = x16_idx + ((k & 1) << 3);
272 int y8_idx = y16_idx + ((k >> 1) << 3);
273 unsigned int sse = 0;
274 int sum = 0;
275 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
276 int s_avg;
277 int d_avg = 128;
278 s_avg = aom_avg_8x8(s + y8_idx * sp + x8_idx, sp);
279 if (!is_key_frame) d_avg = aom_avg_8x8(d + y8_idx * dp + x8_idx, dp);
280
281 sum = s_avg - d_avg;
282 sse = sum * sum;
283 }
284 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
285 }
286 }
287
compute_minmax_8x8(const uint8_t * s,int sp,const uint8_t * d,int dp,int x16_idx,int y16_idx,int pixels_wide,int pixels_high)288 static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d,
289 int dp, int x16_idx, int y16_idx, int pixels_wide,
290 int pixels_high) {
291 int k;
292 int minmax_max = 0;
293 int minmax_min = 255;
294 // Loop over the 4 8x8 subblocks.
295 for (k = 0; k < 4; k++) {
296 int x8_idx = x16_idx + ((k & 1) << 3);
297 int y8_idx = y16_idx + ((k >> 1) << 3);
298 int min = 0;
299 int max = 0;
300 if (x8_idx < pixels_wide && y8_idx < pixels_high) {
301 aom_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp,
302 &min, &max);
303 if ((max - min) > minmax_max) minmax_max = (max - min);
304 if ((max - min) < minmax_min) minmax_min = (max - min);
305 }
306 }
307 return (minmax_max - minmax_min);
308 }
309
fill_variance_4x4avg(const uint8_t * s,int sp,const uint8_t * d,int dp,int x8_idx,int y8_idx,v8x8 * vst,int pixels_wide,int pixels_high,int is_key_frame)310 static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d,
311 int dp, int x8_idx, int y8_idx, v8x8 *vst,
312 int pixels_wide, int pixels_high,
313 int is_key_frame) {
314 int k;
315 for (k = 0; k < 4; k++) {
316 int x4_idx = x8_idx + ((k & 1) << 2);
317 int y4_idx = y8_idx + ((k >> 1) << 2);
318 unsigned int sse = 0;
319 int sum = 0;
320 if (x4_idx < pixels_wide && y4_idx < pixels_high) {
321 int s_avg;
322 int d_avg = 128;
323 s_avg = aom_avg_4x4(s + y4_idx * sp + x4_idx, sp);
324 if (!is_key_frame) d_avg = aom_avg_4x4(d + y4_idx * dp + x4_idx, dp);
325 sum = s_avg - d_avg;
326 sse = sum * sum;
327 }
328 fill_variance(sse, sum, 0, &vst->split[k].part_variances.none);
329 }
330 }
331
scale_part_thresh_sumdiff(int64_t threshold_base,int speed,int width,int height,int content_state)332 static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed,
333 int width, int height,
334 int content_state) {
335 if (speed >= 8) {
336 if (width <= 640 && height <= 480)
337 return (5 * threshold_base) >> 2;
338 else if ((content_state == kLowSadLowSumdiff) ||
339 (content_state == kHighSadLowSumdiff) ||
340 (content_state == kLowVarHighSumdiff))
341 return (5 * threshold_base) >> 2;
342 } else if (speed == 7) {
343 if ((content_state == kLowSadLowSumdiff) ||
344 (content_state == kHighSadLowSumdiff) ||
345 (content_state == kLowVarHighSumdiff)) {
346 return (5 * threshold_base) >> 2;
347 }
348 }
349 return threshold_base;
350 }
351
352 // Set the variance split thresholds for following the block sizes:
353 // 0 - threshold_128x128, 1 - threshold_64x64, 2 - threshold_32x32,
354 // 3 - vbp_threshold_16x16. 4 - vbp_threshold_8x8 (to split to 4x4 partition) is
355 // currently only used on key frame.
set_vbp_thresholds(AV1_COMP * cpi,int64_t thresholds[],int q,int content_state)356 static void set_vbp_thresholds(AV1_COMP *cpi, int64_t thresholds[], int q,
357 int content_state) {
358 AV1_COMMON *const cm = &cpi->common;
359 const int is_key_frame = frame_is_intra_only(cm);
360 const int threshold_multiplier = is_key_frame ? 40 : 1;
361 int64_t threshold_base =
362 (int64_t)(threshold_multiplier * cpi->dequants.y_dequant_QTX[q][1]);
363
364 if (is_key_frame) {
365 thresholds[0] = threshold_base;
366 thresholds[1] = threshold_base;
367 thresholds[2] = threshold_base >> 2;
368 thresholds[3] = threshold_base >> 2;
369 thresholds[4] = threshold_base << 2;
370 } else {
371 // Increase base variance threshold based on content_state/sum_diff level.
372 threshold_base = scale_part_thresh_sumdiff(
373 threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state);
374
375 thresholds[1] = threshold_base;
376 thresholds[3] = threshold_base << cpi->oxcf.speed;
377 if (cm->width >= 1280 && cm->height >= 720)
378 thresholds[3] = thresholds[3] << 1;
379 if (cm->width <= 352 && cm->height <= 288) {
380 thresholds[1] = threshold_base >> 3;
381 thresholds[2] = threshold_base >> 1;
382 thresholds[3] = threshold_base << 3;
383 } else if (cm->width < 1280 && cm->height < 720) {
384 thresholds[2] = (5 * threshold_base) >> 2;
385 } else if (cm->width < 1920 && cm->height < 1080) {
386 thresholds[2] = threshold_base << 1;
387 thresholds[3] <<= 2;
388 } else {
389 thresholds[2] = (5 * threshold_base) >> 1;
390 }
391 }
392 }
393
av1_set_variance_partition_thresholds(AV1_COMP * cpi,int q,int content_state)394 void av1_set_variance_partition_thresholds(AV1_COMP *cpi, int q,
395 int content_state) {
396 AV1_COMMON *const cm = &cpi->common;
397 SPEED_FEATURES *const sf = &cpi->sf;
398 const int is_key_frame = frame_is_intra_only(cm);
399 if (sf->partition_search_type != VAR_BASED_PARTITION) {
400 return;
401 } else {
402 set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state);
403 // The thresholds below are not changed locally.
404 if (is_key_frame) {
405 cpi->vbp_threshold_sad = 0;
406 cpi->vbp_threshold_copy = 0;
407 cpi->vbp_bsize_min = BLOCK_8X8;
408 } else {
409 if (cm->width <= 352 && cm->height <= 288)
410 cpi->vbp_threshold_sad = 10;
411 else
412 cpi->vbp_threshold_sad = (cpi->dequants.y_dequant_QTX[q][1] << 1) > 1000
413 ? (cpi->dequants.y_dequant_QTX[q][1] << 1)
414 : 1000;
415 cpi->vbp_bsize_min = BLOCK_16X16;
416 if (cm->width <= 352 && cm->height <= 288)
417 cpi->vbp_threshold_copy = 4000;
418 else if (cm->width <= 640 && cm->height <= 360)
419 cpi->vbp_threshold_copy = 8000;
420 else
421 cpi->vbp_threshold_copy =
422 (cpi->dequants.y_dequant_QTX[q][1] << 3) > 8000
423 ? (cpi->dequants.y_dequant_QTX[q][1] << 3)
424 : 8000;
425 }
426 cpi->vbp_threshold_minmax = 15 + (q >> 3);
427 }
428 }
429
430 // This function chooses partitioning based on the variance between source and
431 // reconstructed last, where variance is computed for down-sampled inputs.
432 // TODO(kyslov): lot of things. Bring back noise estimation, brush up partition
433 // selection and most of all - retune the thresholds
av1_choose_var_based_partitioning(AV1_COMP * cpi,const TileInfo * const tile,MACROBLOCK * x,int mi_row,int mi_col)434 int av1_choose_var_based_partitioning(AV1_COMP *cpi, const TileInfo *const tile,
435 MACROBLOCK *x, int mi_row, int mi_col) {
436 AV1_COMMON *const cm = &cpi->common;
437 MACROBLOCKD *xd = &x->e_mbd;
438
439 int i, j, k, m;
440 v128x128 *vt;
441 v16x16 *vt2 = NULL;
442 unsigned char force_split[85];
443 int avg_32x32;
444 int max_var_32x32 = 0;
445 int min_var_32x32 = INT_MAX;
446 int var_32x32;
447 int var_64x64;
448 int min_var_64x64 = INT_MAX;
449 int max_var_64x64 = 0;
450 int avg_16x16[4];
451 int maxvar_16x16[4];
452 int minvar_16x16[4];
453 int64_t threshold_4x4avg;
454 int content_state = 0;
455 uint8_t *s;
456 const uint8_t *d;
457 int sp;
458 int dp;
459 int compute_minmax_variance = 1;
460 int is_key_frame = frame_is_intra_only(cm);
461 int pixels_wide = 128, pixels_high = 128;
462 assert(cm->seq_params.sb_size == BLOCK_64X64 ||
463 cm->seq_params.sb_size == BLOCK_128X128);
464 const int is_small_sb = (cm->seq_params.sb_size == BLOCK_64X64);
465 const int num_64x64_blocks = is_small_sb ? 1 : 4;
466
467 CHECK_MEM_ERROR(cm, vt, aom_calloc(1, sizeof(*vt)));
468
469 int64_t thresholds[5] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1],
470 cpi->vbp_thresholds[2], cpi->vbp_thresholds[3],
471 cpi->vbp_thresholds[4] };
472
473 const int low_res = (cm->width <= 352 && cm->height <= 288);
474 int variance4x4downsample[64];
475 int segment_id;
476 const int num_planes = av1_num_planes(cm);
477
478 segment_id = xd->mi[0]->segment_id;
479
480 set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state);
481
482 if (is_small_sb) {
483 pixels_wide = 64;
484 pixels_high = 64;
485 }
486
487 // For non keyframes, disable 4x4 average for low resolution when speed = 8
488 threshold_4x4avg = INT64_MAX;
489
490 if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3);
491 if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3);
492
493 s = x->plane[0].src.buf;
494 sp = x->plane[0].src.stride;
495
496 // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks,
497 // 5-20 for the 16x16 blocks.
498 force_split[0] = 0;
499
500 if (!is_key_frame) {
501 // TODO(kyslov): we are assuming that the ref is LAST_FRAME! Check if it
502 // is!!
503 MB_MODE_INFO *mi = xd->mi[0];
504 const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_yv12_buf(cm, LAST_FRAME);
505
506 assert(yv12 != NULL);
507
508 av1_setup_pre_planes(xd, 0, yv12, mi_row, mi_col,
509 get_ref_scale_factors(cm, LAST_FRAME), num_planes);
510 mi->ref_frame[0] = LAST_FRAME;
511 mi->ref_frame[1] = NONE_FRAME;
512 mi->sb_type = cm->seq_params.sb_size;
513 mi->mv[0].as_int = 0;
514 mi->interp_filters = av1_make_interp_filters(BILINEAR, BILINEAR);
515 if (xd->mb_to_right_edge >= 0 && xd->mb_to_bottom_edge >= 0) {
516 const MV dummy_mv = { 0, 0 };
517 av1_int_pro_motion_estimation(cpi, x, cm->seq_params.sb_size, mi_row,
518 mi_col, &dummy_mv);
519 }
520
521 // TODO(kyslov): bring the small SAD functionality back
522 #if 0
523 y_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, x->plane[0].src.stride,
524 xd->plane[0].pre[0].buf,
525 xd->plane[0].pre[0].stride);
526 #endif
527 x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv;
528
529 set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]);
530 av1_enc_build_inter_predictor(cm, xd, mi_row, mi_col, NULL,
531 cm->seq_params.sb_size, AOM_PLANE_Y,
532 AOM_PLANE_Y);
533
534 d = xd->plane[0].dst.buf;
535 dp = xd->plane[0].dst.stride;
536
537 // If the y_sad is very small, take 64x64 as partition and exit.
538 // Don't check on boosted segment for now, as 64x64 is suppressed there.
539 #if 0
540 if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad)
541 { const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const
542 int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; if (mi_col +
543 block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows)
544 { set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_128X128);
545 x->variance_low[0] = 1;
546 return 0;
547 }
548 }
549 #endif
550 } else {
551 d = AV1_VAR_OFFS;
552 dp = 0;
553 }
554
555 if (low_res && threshold_4x4avg < INT64_MAX)
556 CHECK_MEM_ERROR(cm, vt2, aom_calloc(64, sizeof(*vt2)));
557 // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances
558 // for splits.
559 for (m = 0; m < num_64x64_blocks; m++) {
560 const int x64_idx = ((m & 1) << 6);
561 const int y64_idx = ((m >> 1) << 6);
562 const int m2 = m << 2;
563 force_split[m + 1] = 0;
564 for (i = 0; i < 4; i++) {
565 const int x32_idx = x64_idx + ((i & 1) << 5);
566 const int y32_idx = y64_idx + ((i >> 1) << 5);
567 const int i2 = (m2 + i) << 2;
568 force_split[5 + m2 + i] = 0;
569 avg_16x16[i] = 0;
570 maxvar_16x16[i] = 0;
571 minvar_16x16[i] = INT_MAX;
572 for (j = 0; j < 4; j++) {
573 const int x16_idx = x32_idx + ((j & 1) << 4);
574 const int y16_idx = y32_idx + ((j >> 1) << 4);
575 const int split_index = 21 + i2 + j;
576 v16x16 *vst = &vt->split[m].split[i].split[j];
577 force_split[split_index] = 0;
578 variance4x4downsample[i2 + j] = 0;
579 if (!is_key_frame) {
580 fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, pixels_wide,
581 pixels_high, is_key_frame);
582 fill_variance_tree(&vt->split[m].split[i].split[j], BLOCK_16X16);
583 get_variance(&vt->split[m].split[i].split[j].part_variances.none);
584 avg_16x16[i] +=
585 vt->split[m].split[i].split[j].part_variances.none.variance;
586 if (vt->split[m].split[i].split[j].part_variances.none.variance <
587 minvar_16x16[i])
588 minvar_16x16[i] =
589 vt->split[m].split[i].split[j].part_variances.none.variance;
590 if (vt->split[m].split[i].split[j].part_variances.none.variance >
591 maxvar_16x16[i])
592 maxvar_16x16[i] =
593 vt->split[m].split[i].split[j].part_variances.none.variance;
594 if (vt->split[m].split[i].split[j].part_variances.none.variance >
595 thresholds[3]) {
596 // 16X16 variance is above threshold for split, so force split to
597 // 8x8 for this 16x16 block (this also forces splits for upper
598 // levels).
599 force_split[split_index] = 1;
600 force_split[5 + m2 + i] = 1;
601 force_split[m + 1] = 1;
602 force_split[0] = 1;
603 } else if (compute_minmax_variance &&
604 vt->split[m]
605 .split[i]
606 .split[j]
607 .part_variances.none.variance > thresholds[2] &&
608 !cyclic_refresh_segment_id_boosted(segment_id)) {
609 // We have some nominal amount of 16x16 variance (based on average),
610 // compute the minmax over the 8x8 sub-blocks, and if above
611 // threshold, force split to 8x8 block for this 16x16 block.
612 int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx,
613 pixels_wide, pixels_high);
614 int thresh_minmax = (int)cpi->vbp_threshold_minmax;
615 if (minmax > thresh_minmax) {
616 force_split[split_index] = 1;
617 force_split[5 + m2 + i] = 1;
618 force_split[m + 1] = 1;
619 force_split[0] = 1;
620 }
621 }
622 }
623 if (is_key_frame) {
624 force_split[split_index] = 0;
625 // Go down to 4x4 down-sampling for variance.
626 variance4x4downsample[i2 + j] = 1;
627 for (k = 0; k < 4; k++) {
628 int x8_idx = x16_idx + ((k & 1) << 3);
629 int y8_idx = y16_idx + ((k >> 1) << 3);
630 v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k];
631 fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2,
632 pixels_wide, pixels_high, is_key_frame);
633 }
634 }
635 }
636 }
637 }
638
639 // Fill the rest of the variance tree by summing split partition values.
640 for (m = 0; m < num_64x64_blocks; ++m) {
641 avg_32x32 = 0;
642 const int m2 = m << 2;
643 for (i = 0; i < 4; i++) {
644 const int i2 = (m2 + i) << 2;
645 for (j = 0; j < 4; j++) {
646 const int split_index = 21 + i2 + j;
647 if (variance4x4downsample[i2 + j] == 1) {
648 v16x16 *vtemp =
649 (!is_key_frame) ? &vt2[i2 + j] : &vt->split[m].split[i].split[j];
650 for (k = 0; k < 4; k++)
651 fill_variance_tree(&vtemp->split[k], BLOCK_8X8);
652 fill_variance_tree(vtemp, BLOCK_16X16);
653 // If variance of this 16x16 block is above the threshold, force block
654 // to split. This also forces a split on the upper levels.
655 get_variance(&vtemp->part_variances.none);
656 if (vtemp->part_variances.none.variance > thresholds[3]) {
657 force_split[split_index] = 1;
658 force_split[5 + m2 + i] = 1;
659 force_split[m + 1] = 1;
660 force_split[0] = 1;
661 }
662 }
663 }
664 fill_variance_tree(&vt->split[m].split[i], BLOCK_32X32);
665 // If variance of this 32x32 block is above the threshold, or if its above
666 // (some threshold of) the average variance over the sub-16x16 blocks,
667 // then force this block to split. This also forces a split on the upper
668 // (64x64) level.
669 if (!force_split[5 + m2 + i]) {
670 get_variance(&vt->split[m].split[i].part_variances.none);
671 var_32x32 = vt->split[m].split[i].part_variances.none.variance;
672 max_var_32x32 = AOMMAX(var_32x32, max_var_32x32);
673 min_var_32x32 = AOMMIN(var_32x32, min_var_32x32);
674 if (vt->split[m].split[i].part_variances.none.variance >
675 thresholds[2] ||
676 (!is_key_frame &&
677 vt->split[m].split[i].part_variances.none.variance >
678 (thresholds[2] >> 1) &&
679 vt->split[m].split[i].part_variances.none.variance >
680 (avg_16x16[i] >> 1))) {
681 force_split[5 + m2 + i] = 1;
682 force_split[m + 1] = 1;
683 force_split[0] = 1;
684 } else if (!is_key_frame && cm->height <= 360 &&
685 (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[2] >> 1) &&
686 maxvar_16x16[i] > thresholds[2]) {
687 force_split[5 + m2 + i] = 1;
688 force_split[m + 1] = 1;
689 force_split[0] = 1;
690 }
691 avg_32x32 += var_32x32;
692 }
693 }
694 if (!force_split[1 + m]) {
695 fill_variance_tree(&vt->split[m], BLOCK_64X64);
696 get_variance(&vt->split[m].part_variances.none);
697 var_64x64 = vt->split[m].part_variances.none.variance;
698 max_var_64x64 = AOMMAX(var_64x64, max_var_64x64);
699 min_var_64x64 = AOMMIN(var_64x64, min_var_64x64);
700 // If variance of this 64x64 block is above (some threshold of) the
701 // average variance over the sub-32x32 blocks, then force this block to
702 // split. Only checking this for noise level >= medium for now.
703
704 if (!is_key_frame &&
705 (max_var_32x32 - min_var_32x32) > 3 * (thresholds[1] >> 3) &&
706 max_var_32x32 > thresholds[1] >> 1)
707 force_split[1 + m] = 1;
708 }
709 if (is_small_sb) force_split[0] = 1;
710 }
711
712 if (!force_split[0]) {
713 fill_variance_tree(vt, BLOCK_128X128);
714 get_variance(&vt->part_variances.none);
715 if (!is_key_frame &&
716 (max_var_64x64 - min_var_64x64) > 3 * (thresholds[0] >> 3) &&
717 max_var_64x64 > thresholds[0] >> 1)
718 force_split[0] = 1;
719 }
720
721 if (!set_vt_partitioning(cpi, x, xd, tile, vt, BLOCK_128X128, mi_row, mi_col,
722 thresholds[0], BLOCK_16X16, force_split[0])) {
723 for (m = 0; m < num_64x64_blocks; ++m) {
724 const int x64_idx = ((m & 1) << 4);
725 const int y64_idx = ((m >> 1) << 4);
726 const int m2 = m << 2;
727
728 // Now go through the entire structure, splitting every block size until
729 // we get to one that's got a variance lower than our threshold.
730 if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m], BLOCK_64X64,
731 mi_row + y64_idx, mi_col + x64_idx,
732 thresholds[1], BLOCK_16X16,
733 force_split[1 + m])) {
734 for (i = 0; i < 4; ++i) {
735 const int x32_idx = ((i & 1) << 3);
736 const int y32_idx = ((i >> 1) << 3);
737 const int i2 = (m2 + i) << 2;
738 if (!set_vt_partitioning(cpi, x, xd, tile, &vt->split[m].split[i],
739 BLOCK_32X32, (mi_row + y64_idx + y32_idx),
740 (mi_col + x64_idx + x32_idx), thresholds[2],
741 BLOCK_16X16, force_split[5 + m2 + i])) {
742 for (j = 0; j < 4; ++j) {
743 const int x16_idx = ((j & 1) << 2);
744 const int y16_idx = ((j >> 1) << 2);
745 const int split_index = 21 + i2 + j;
746 // For inter frames: if variance4x4downsample[] == 1 for this
747 // 16x16 block, then the variance is based on 4x4 down-sampling,
748 // so use vt2 in set_vt_partioning(), otherwise use vt.
749 v16x16 *vtemp =
750 (!is_key_frame && variance4x4downsample[i2 + j] == 1)
751 ? &vt2[i2 + j]
752 : &vt->split[m].split[i].split[j];
753 if (!set_vt_partitioning(cpi, x, xd, tile, vtemp, BLOCK_16X16,
754 mi_row + y64_idx + y32_idx + y16_idx,
755 mi_col + x64_idx + x32_idx + x16_idx,
756 thresholds[3], BLOCK_8X8,
757 force_split[split_index])) {
758 for (k = 0; k < 4; ++k) {
759 const int x8_idx = (k & 1) << 1;
760 const int y8_idx = (k >> 1) << 1;
761 set_block_size(
762 cpi, x, xd,
763 (mi_row + y64_idx + y32_idx + y16_idx + y8_idx),
764 (mi_col + x64_idx + x32_idx + x16_idx + x8_idx),
765 BLOCK_8X8);
766 }
767 }
768 }
769 }
770 }
771 }
772 }
773 }
774
775 if (vt2) aom_free(vt2);
776 if (vt) aom_free(vt);
777 return 0;
778 }
779