1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19 
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 
23 #include "av1/common/av1_common_int.h"
24 #include "av1/common/blockd.h"
25 #include "av1/common/mvref_common.h"
26 #include "av1/common/obmc.h"
27 #include "av1/common/reconinter.h"
28 #include "av1/common/reconintra.h"
29 
30 // This function will determine whether or not to create a warped
31 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)32 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
33                    const WarpTypesAllowed *const warp_types,
34                    const WarpedMotionParams *const gm_params,
35                    int build_for_obmc, const struct scale_factors *const sf,
36                    WarpedMotionParams *final_warp_params) {
37   // Note: As per the spec, we must test the fixed point scales here, which are
38   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
39   // have 1 << 10 precision).
40   if (av1_is_scaled(sf)) return 0;
41 
42   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
43 
44   if (build_for_obmc) return 0;
45 
46   if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
47     if (final_warp_params != NULL)
48       memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
49     return 1;
50   } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
51     if (final_warp_params != NULL)
52       memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
53     return 1;
54   }
55 
56   return 0;
57 }
58 
av1_init_inter_params(InterPredParams * inter_pred_params,int block_width,int block_height,int pix_row,int pix_col,int subsampling_x,int subsampling_y,int bit_depth,int use_hbd_buf,int is_intrabc,const struct scale_factors * sf,const struct buf_2d * ref_buf,int_interpfilters interp_filters)59 void av1_init_inter_params(InterPredParams *inter_pred_params, int block_width,
60                            int block_height, int pix_row, int pix_col,
61                            int subsampling_x, int subsampling_y, int bit_depth,
62                            int use_hbd_buf, int is_intrabc,
63                            const struct scale_factors *sf,
64                            const struct buf_2d *ref_buf,
65                            int_interpfilters interp_filters) {
66   inter_pred_params->block_width = block_width;
67   inter_pred_params->block_height = block_height;
68   inter_pred_params->pix_row = pix_row;
69   inter_pred_params->pix_col = pix_col;
70   inter_pred_params->subsampling_x = subsampling_x;
71   inter_pred_params->subsampling_y = subsampling_y;
72   inter_pred_params->bit_depth = bit_depth;
73   inter_pred_params->use_hbd_buf = use_hbd_buf;
74   inter_pred_params->is_intrabc = is_intrabc;
75   inter_pred_params->scale_factors = sf;
76   inter_pred_params->ref_frame_buf = *ref_buf;
77   inter_pred_params->mode = TRANSLATION_PRED;
78   inter_pred_params->comp_mode = UNIFORM_SINGLE;
79 
80   if (is_intrabc) {
81     inter_pred_params->interp_filter_params[0] = &av1_intrabc_filter_params;
82     inter_pred_params->interp_filter_params[1] = &av1_intrabc_filter_params;
83   } else {
84     inter_pred_params->interp_filter_params[0] =
85         av1_get_interp_filter_params_with_block_size(
86             interp_filters.as_filters.x_filter, block_width);
87     inter_pred_params->interp_filter_params[1] =
88         av1_get_interp_filter_params_with_block_size(
89             interp_filters.as_filters.y_filter, block_height);
90   }
91 }
92 
av1_init_comp_mode(InterPredParams * inter_pred_params)93 void av1_init_comp_mode(InterPredParams *inter_pred_params) {
94   inter_pred_params->comp_mode = UNIFORM_COMP;
95 }
96 
av1_init_warp_params(InterPredParams * inter_pred_params,const WarpTypesAllowed * warp_types,int ref,const MACROBLOCKD * xd,const MB_MODE_INFO * mi)97 void av1_init_warp_params(InterPredParams *inter_pred_params,
98                           const WarpTypesAllowed *warp_types, int ref,
99                           const MACROBLOCKD *xd, const MB_MODE_INFO *mi) {
100   if (inter_pred_params->block_height < 8 || inter_pred_params->block_width < 8)
101     return;
102 
103   if (xd->cur_frame_force_integer_mv) return;
104 
105   if (av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]], 0,
106                      inter_pred_params->scale_factors,
107                      &inter_pred_params->warp_params))
108     inter_pred_params->mode = WARP_PRED;
109 }
110 
av1_init_mask_comp(InterPredParams * inter_pred_params,BLOCK_SIZE bsize,const INTERINTER_COMPOUND_DATA * mask_comp)111 void av1_init_mask_comp(InterPredParams *inter_pred_params, BLOCK_SIZE bsize,
112                         const INTERINTER_COMPOUND_DATA *mask_comp) {
113   inter_pred_params->sb_type = bsize;
114   inter_pred_params->mask_comp = *mask_comp;
115 
116   if (inter_pred_params->conv_params.compound_index == 1) {
117     inter_pred_params->conv_params.do_average = 0;
118     inter_pred_params->comp_mode = MASK_COMP;
119   }
120 }
121 
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)122 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
123                               int dst_stride,
124                               InterPredParams *inter_pred_params,
125                               const SubpelParams *subpel_params) {
126   assert(IMPLIES(inter_pred_params->conv_params.is_compound,
127                  inter_pred_params->conv_params.dst != NULL));
128 
129   // TODO(jingning): av1_warp_plane() can be further cleaned up.
130   if (inter_pred_params->mode == WARP_PRED) {
131     av1_warp_plane(
132         &inter_pred_params->warp_params, inter_pred_params->use_hbd_buf,
133         inter_pred_params->bit_depth, inter_pred_params->ref_frame_buf.buf0,
134         inter_pred_params->ref_frame_buf.width,
135         inter_pred_params->ref_frame_buf.height,
136         inter_pred_params->ref_frame_buf.stride, dst,
137         inter_pred_params->pix_col, inter_pred_params->pix_row,
138         inter_pred_params->block_width, inter_pred_params->block_height,
139         dst_stride, inter_pred_params->subsampling_x,
140         inter_pred_params->subsampling_y, &inter_pred_params->conv_params);
141   } else if (inter_pred_params->mode == TRANSLATION_PRED) {
142 #if CONFIG_AV1_HIGHBITDEPTH
143     if (inter_pred_params->use_hbd_buf) {
144       highbd_inter_predictor(
145           src, src_stride, dst, dst_stride, subpel_params,
146           inter_pred_params->scale_factors, inter_pred_params->block_width,
147           inter_pred_params->block_height, &inter_pred_params->conv_params,
148           inter_pred_params->interp_filter_params,
149           inter_pred_params->bit_depth);
150     } else {
151       inter_predictor(
152           src, src_stride, dst, dst_stride, subpel_params,
153           inter_pred_params->scale_factors, inter_pred_params->block_width,
154           inter_pred_params->block_height, &inter_pred_params->conv_params,
155           inter_pred_params->interp_filter_params);
156     }
157 #else
158     inter_predictor(
159         src, src_stride, dst, dst_stride, subpel_params,
160         inter_pred_params->scale_factors, inter_pred_params->block_width,
161         inter_pred_params->block_height, &inter_pred_params->conv_params,
162         inter_pred_params->interp_filter_params);
163 #endif
164   }
165 }
166 
167 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
168   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
169   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
170   37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
171   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
172 };
173 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
174   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
175   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
176   46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
177   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
178 };
179 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
180   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
181   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
182   43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
183   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
184 };
185 
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)186 static AOM_INLINE void shift_copy(const uint8_t *src, uint8_t *dst, int shift,
187                                   int width) {
188   if (shift >= 0) {
189     memcpy(dst + shift, src, width - shift);
190     memset(dst, src[0], shift);
191   } else {
192     shift = -shift;
193     memcpy(dst, src + shift, width - shift);
194     memset(dst + width - shift, src[width - 1], shift);
195   }
196 }
197 
198 /* clang-format off */
199 DECLARE_ALIGNED(16, static uint8_t,
200                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
201   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
202   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
203   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
204   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
205   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
206   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
207   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
208   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
209   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
210   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
211   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
212   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
213   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
214   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
215   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
216   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
217   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
218   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
219   { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
220   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
221   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
222   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
223 };
224 /* clang-format on */
225 
226 // [negative][direction]
227 DECLARE_ALIGNED(
228     16, static uint8_t,
229     wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
230 
231 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
232 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
233 DECLARE_ALIGNED(16, static uint8_t,
234                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
235 
236 DECLARE_ALIGNED(16, static uint8_t,
237                 smooth_interintra_mask_buf[INTERINTRA_MODES][BLOCK_SIZES_ALL]
238                                           [MAX_WEDGE_SQUARE]);
239 
240 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
241 
242 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
243   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
244   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
245   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
246   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
247   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
248   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
249   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
250   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
251 };
252 
253 static const wedge_code_type wedge_codebook_16_hltw[16] = {
254   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
255   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
256   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
257   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
258   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
259   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
260   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
261   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
262 };
263 
264 static const wedge_code_type wedge_codebook_16_heqw[16] = {
265   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
266   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
267   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
268   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
269   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
270   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
271   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
272   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
273 };
274 
275 const wedge_params_type av1_wedge_params_lookup[BLOCK_SIZES_ALL] = {
276   { 0, NULL, NULL, NULL },
277   { 0, NULL, NULL, NULL },
278   { 0, NULL, NULL, NULL },
279   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
280     wedge_masks[BLOCK_8X8] },
281   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
282     wedge_masks[BLOCK_8X16] },
283   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
284     wedge_masks[BLOCK_16X8] },
285   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
286     wedge_masks[BLOCK_16X16] },
287   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
288     wedge_masks[BLOCK_16X32] },
289   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
290     wedge_masks[BLOCK_32X16] },
291   { MAX_WEDGE_TYPES, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
292     wedge_masks[BLOCK_32X32] },
293   { 0, NULL, NULL, NULL },
294   { 0, NULL, NULL, NULL },
295   { 0, NULL, NULL, NULL },
296   { 0, NULL, NULL, NULL },
297   { 0, NULL, NULL, NULL },
298   { 0, NULL, NULL, NULL },
299   { 0, NULL, NULL, NULL },
300   { 0, NULL, NULL, NULL },
301   { MAX_WEDGE_TYPES, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
302     wedge_masks[BLOCK_8X32] },
303   { MAX_WEDGE_TYPES, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
304     wedge_masks[BLOCK_32X8] },
305   { 0, NULL, NULL, NULL },
306   { 0, NULL, NULL, NULL },
307 };
308 
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)309 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
310                                              BLOCK_SIZE sb_type) {
311   const uint8_t *master;
312   const int bh = block_size_high[sb_type];
313   const int bw = block_size_wide[sb_type];
314   const wedge_code_type *a =
315       av1_wedge_params_lookup[sb_type].codebook + wedge_index;
316   int woff, hoff;
317   const uint8_t wsignflip =
318       av1_wedge_params_lookup[sb_type].signflip[wedge_index];
319 
320   assert(wedge_index >= 0 && wedge_index < get_wedge_types_lookup(sb_type));
321   woff = (a->x_offset * bw) >> 3;
322   hoff = (a->y_offset * bh) >> 3;
323   master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
324            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
325            MASK_MASTER_SIZE / 2 - woff;
326   return master;
327 }
328 
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)329 const uint8_t *av1_get_compound_type_mask(
330     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
331   assert(is_masked_compound_type(comp_data->type));
332   (void)sb_type;
333   switch (comp_data->type) {
334     case COMPOUND_WEDGE:
335       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
336                                           comp_data->wedge_sign, sb_type);
337     case COMPOUND_DIFFWTD: return comp_data->seg_mask;
338     default: assert(0); return NULL;
339   }
340 }
341 
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)342 static AOM_INLINE void diffwtd_mask_d16(
343     uint8_t *mask, int which_inverse, int mask_base, const CONV_BUF_TYPE *src0,
344     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
345     ConvolveParams *conv_params, int bd) {
346   int round =
347       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
348   int i, j, m, diff;
349   for (i = 0; i < h; ++i) {
350     for (j = 0; j < w; ++j) {
351       diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
352       diff = ROUND_POWER_OF_TWO(diff, round);
353       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
354       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
355     }
356   }
357 }
358 
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)359 void av1_build_compound_diffwtd_mask_d16_c(
360     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
361     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
362     ConvolveParams *conv_params, int bd) {
363   switch (mask_type) {
364     case DIFFWTD_38:
365       diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
366                        conv_params, bd);
367       break;
368     case DIFFWTD_38_INV:
369       diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
370                        conv_params, bd);
371       break;
372     default: assert(0);
373   }
374 }
375 
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)376 static AOM_INLINE void diffwtd_mask(uint8_t *mask, int which_inverse,
377                                     int mask_base, const uint8_t *src0,
378                                     int src0_stride, const uint8_t *src1,
379                                     int src1_stride, int h, int w) {
380   int i, j, m, diff;
381   for (i = 0; i < h; ++i) {
382     for (j = 0; j < w; ++j) {
383       diff =
384           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
385       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
386       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
387     }
388   }
389 }
390 
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)391 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
392                                        DIFFWTD_MASK_TYPE mask_type,
393                                        const uint8_t *src0, int src0_stride,
394                                        const uint8_t *src1, int src1_stride,
395                                        int h, int w) {
396   switch (mask_type) {
397     case DIFFWTD_38:
398       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
399       break;
400     case DIFFWTD_38_INV:
401       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
402       break;
403     default: assert(0);
404   }
405 }
406 
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)407 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
408     uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
409     int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
410     const unsigned int bd) {
411   assert(bd >= 8);
412   if (bd == 8) {
413     if (which_inverse) {
414       for (int i = 0; i < h; ++i) {
415         for (int j = 0; j < w; ++j) {
416           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
417           unsigned int m = negative_to_zero(mask_base + diff);
418           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
419           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
420         }
421         src0 += src0_stride;
422         src1 += src1_stride;
423         mask += w;
424       }
425     } else {
426       for (int i = 0; i < h; ++i) {
427         for (int j = 0; j < w; ++j) {
428           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
429           unsigned int m = negative_to_zero(mask_base + diff);
430           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
431           mask[j] = m;
432         }
433         src0 += src0_stride;
434         src1 += src1_stride;
435         mask += w;
436       }
437     }
438   } else {
439     const unsigned int bd_shift = bd - 8;
440     if (which_inverse) {
441       for (int i = 0; i < h; ++i) {
442         for (int j = 0; j < w; ++j) {
443           int diff =
444               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
445           unsigned int m = negative_to_zero(mask_base + diff);
446           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
447           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
448         }
449         src0 += src0_stride;
450         src1 += src1_stride;
451         mask += w;
452       }
453     } else {
454       for (int i = 0; i < h; ++i) {
455         for (int j = 0; j < w; ++j) {
456           int diff =
457               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
458           unsigned int m = negative_to_zero(mask_base + diff);
459           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
460           mask[j] = m;
461         }
462         src0 += src0_stride;
463         src1 += src1_stride;
464         mask += w;
465       }
466     }
467   }
468 }
469 
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)470 void av1_build_compound_diffwtd_mask_highbd_c(
471     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
472     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
473     int bd) {
474   switch (mask_type) {
475     case DIFFWTD_38:
476       diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
477                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
478       break;
479     case DIFFWTD_38_INV:
480       diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
481                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
482       break;
483     default: assert(0);
484   }
485 }
486 
init_wedge_master_masks()487 static AOM_INLINE void init_wedge_master_masks() {
488   int i, j;
489   const int w = MASK_MASTER_SIZE;
490   const int h = MASK_MASTER_SIZE;
491   const int stride = MASK_MASTER_STRIDE;
492   // Note: index [0] stores the masters, and [1] its complement.
493   // Generate prototype by shifting the masters
494   int shift = h / 4;
495   for (i = 0; i < h; i += 2) {
496     shift_copy(wedge_master_oblique_even,
497                &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
498                MASK_MASTER_SIZE);
499     shift--;
500     shift_copy(wedge_master_oblique_odd,
501                &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
502                MASK_MASTER_SIZE);
503     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
504            wedge_master_vertical,
505            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
506     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
507            wedge_master_vertical,
508            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
509   }
510 
511   for (i = 0; i < h; ++i) {
512     for (j = 0; j < w; ++j) {
513       const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
514       wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
515       wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
516           wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
517               (1 << WEDGE_WEIGHT_BITS) - msk;
518       wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
519           wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
520               (1 << WEDGE_WEIGHT_BITS) - msk;
521       wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
522           wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
523       const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
524       wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
525       wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
526           wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
527               (1 << WEDGE_WEIGHT_BITS) - mskx;
528     }
529   }
530 }
531 
init_wedge_masks()532 static AOM_INLINE void init_wedge_masks() {
533   uint8_t *dst = wedge_mask_buf;
534   BLOCK_SIZE bsize;
535   memset(wedge_masks, 0, sizeof(wedge_masks));
536   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
537     const wedge_params_type *wedge_params = &av1_wedge_params_lookup[bsize];
538     const int wtypes = wedge_params->wedge_types;
539     if (wtypes == 0) continue;
540     const uint8_t *mask;
541     const int bw = block_size_wide[bsize];
542     const int bh = block_size_high[bsize];
543     int w;
544     for (w = 0; w < wtypes; ++w) {
545       mask = get_wedge_mask_inplace(w, 0, bsize);
546       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
547                         bh);
548       wedge_params->masks[0][w] = dst;
549       dst += bw * bh;
550 
551       mask = get_wedge_mask_inplace(w, 1, bsize);
552       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
553                         bh);
554       wedge_params->masks[1][w] = dst;
555       dst += bw * bh;
556     }
557     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
558   }
559 }
560 
561 /* clang-format off */
562 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
563   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
564   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
565   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
566   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
567   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
568   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
569   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
570 };
571 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
572     32, 16, 16, 16, 8, 8, 8, 4,
573     4,  4,  2,  2,  2, 1, 1, 1,
574     8,  8,  4,  4,  2, 2
575 };
576 /* clang-format on */
577 
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)578 static AOM_INLINE void build_smooth_interintra_mask(uint8_t *mask, int stride,
579                                                     BLOCK_SIZE plane_bsize,
580                                                     INTERINTRA_MODE mode) {
581   int i, j;
582   const int bw = block_size_wide[plane_bsize];
583   const int bh = block_size_high[plane_bsize];
584   const int size_scale = ii_size_scales[plane_bsize];
585 
586   switch (mode) {
587     case II_V_PRED:
588       for (i = 0; i < bh; ++i) {
589         memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
590         mask += stride;
591       }
592       break;
593 
594     case II_H_PRED:
595       for (i = 0; i < bh; ++i) {
596         for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
597         mask += stride;
598       }
599       break;
600 
601     case II_SMOOTH_PRED:
602       for (i = 0; i < bh; ++i) {
603         for (j = 0; j < bw; ++j)
604           mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
605         mask += stride;
606       }
607       break;
608 
609     case II_DC_PRED:
610     default:
611       for (i = 0; i < bh; ++i) {
612         memset(mask, 32, bw * sizeof(mask[0]));
613         mask += stride;
614       }
615       break;
616   }
617 }
618 
init_smooth_interintra_masks()619 static AOM_INLINE void init_smooth_interintra_masks() {
620   for (int m = 0; m < INTERINTRA_MODES; ++m) {
621     for (int bs = 0; bs < BLOCK_SIZES_ALL; ++bs) {
622       const int bw = block_size_wide[bs];
623       const int bh = block_size_high[bs];
624       if (bw > MAX_WEDGE_SIZE || bh > MAX_WEDGE_SIZE) continue;
625       build_smooth_interintra_mask(smooth_interintra_mask_buf[m][bs], bw, bs,
626                                    m);
627     }
628   }
629 }
630 
631 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
av1_init_wedge_masks()632 void av1_init_wedge_masks() {
633   init_wedge_master_masks();
634   init_wedge_masks();
635   init_smooth_interintra_masks();
636 }
637 
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,InterPredParams * inter_pred_params)638 static AOM_INLINE void build_masked_compound_no_round(
639     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
640     const CONV_BUF_TYPE *src1, int src1_stride,
641     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
642     int w, InterPredParams *inter_pred_params) {
643   const int ssy = inter_pred_params->subsampling_y;
644   const int ssx = inter_pred_params->subsampling_x;
645   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
646   const int mask_stride = block_size_wide[sb_type];
647 #if CONFIG_AV1_HIGHBITDEPTH
648   if (inter_pred_params->use_hbd_buf) {
649     aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
650                                   src1_stride, mask, mask_stride, w, h, ssx,
651                                   ssy, &inter_pred_params->conv_params,
652                                   inter_pred_params->bit_depth);
653   } else {
654     aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
655                                  src1_stride, mask, mask_stride, w, h, ssx, ssy,
656                                  &inter_pred_params->conv_params);
657   }
658 #else
659   aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
660                                src1_stride, mask, mask_stride, w, h, ssx, ssy,
661                                &inter_pred_params->conv_params);
662 #endif
663 }
664 
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,InterPredParams * inter_pred_params,const SubpelParams * subpel_params)665 void av1_make_masked_inter_predictor(const uint8_t *pre, int pre_stride,
666                                      uint8_t *dst, int dst_stride,
667                                      InterPredParams *inter_pred_params,
668                                      const SubpelParams *subpel_params) {
669   const INTERINTER_COMPOUND_DATA *comp_data = &inter_pred_params->mask_comp;
670   BLOCK_SIZE sb_type = inter_pred_params->sb_type;
671 
672   // We're going to call av1_make_inter_predictor to generate a prediction into
673   // a temporary buffer, then will blend that temporary buffer with that from
674   // the other reference.
675   DECLARE_ALIGNED(32, uint8_t, tmp_buf[2 * MAX_SB_SQUARE]);
676   uint8_t *tmp_dst =
677       inter_pred_params->use_hbd_buf ? CONVERT_TO_BYTEPTR(tmp_buf) : tmp_buf;
678 
679   const int tmp_buf_stride = MAX_SB_SIZE;
680   CONV_BUF_TYPE *org_dst = inter_pred_params->conv_params.dst;
681   int org_dst_stride = inter_pred_params->conv_params.dst_stride;
682   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
683   inter_pred_params->conv_params.dst = tmp_buf16;
684   inter_pred_params->conv_params.dst_stride = tmp_buf_stride;
685   assert(inter_pred_params->conv_params.do_average == 0);
686 
687   // This will generate a prediction in tmp_buf for the second reference
688   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE,
689                            inter_pred_params, subpel_params);
690 
691   if (!inter_pred_params->conv_params.plane &&
692       comp_data->type == COMPOUND_DIFFWTD) {
693     av1_build_compound_diffwtd_mask_d16(
694         comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
695         tmp_buf16, tmp_buf_stride, inter_pred_params->block_height,
696         inter_pred_params->block_width, &inter_pred_params->conv_params,
697         inter_pred_params->bit_depth);
698   }
699   build_masked_compound_no_round(
700       dst, dst_stride, org_dst, org_dst_stride, tmp_buf16, tmp_buf_stride,
701       comp_data, sb_type, inter_pred_params->block_height,
702       inter_pred_params->block_width, inter_pred_params);
703 }
704 
av1_build_one_inter_predictor(uint8_t * dst,int dst_stride,const MV * const src_mv,InterPredParams * inter_pred_params,MACROBLOCKD * xd,int mi_x,int mi_y,int ref,CalcSubpelParamsFunc calc_subpel_params_func)705 void av1_build_one_inter_predictor(
706     uint8_t *dst, int dst_stride, const MV *const src_mv,
707     InterPredParams *inter_pred_params, MACROBLOCKD *xd, int mi_x, int mi_y,
708     int ref, CalcSubpelParamsFunc calc_subpel_params_func) {
709   SubpelParams subpel_params;
710   uint8_t *src;
711   int src_stride;
712   calc_subpel_params_func(src_mv, inter_pred_params, xd, mi_x, mi_y, ref, &src,
713                           &subpel_params, &src_stride);
714 
715   if (inter_pred_params->comp_mode == UNIFORM_SINGLE ||
716       inter_pred_params->comp_mode == UNIFORM_COMP) {
717     av1_make_inter_predictor(src, src_stride, dst, dst_stride,
718                              inter_pred_params, &subpel_params);
719   } else {
720     av1_make_masked_inter_predictor(src, src_stride, dst, dst_stride,
721                                     inter_pred_params, &subpel_params);
722   }
723 }
724 
725 // True if the following hold:
726 //  1. Not intrabc and not build_for_obmc
727 //  2. A U or V plane
728 //  3. If the block size differs from the base block size
729 //  4. If sub-sampled, none of the previous blocks around the sub-sample
730 //     are intrabc or inter-blocks
is_sub8x8_inter(const MACROBLOCKD * xd,int plane,BLOCK_SIZE bsize,int is_intrabc,int build_for_obmc)731 static bool is_sub8x8_inter(const MACROBLOCKD *xd, int plane, BLOCK_SIZE bsize,
732                             int is_intrabc, int build_for_obmc) {
733   if (is_intrabc || build_for_obmc) {
734     return false;
735   }
736 
737   const struct macroblockd_plane *const pd = &xd->plane[plane];
738   const int ss_x = pd->subsampling_x;
739   const int ss_y = pd->subsampling_y;
740   if ((block_size_wide[bsize] >= 8 || !ss_x) &&
741       (block_size_high[bsize] >= 8 || !ss_y)) {
742     return false;
743   }
744 
745   // For sub8x8 chroma blocks, we may be covering more than one luma block's
746   // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
747   // the top-left corner of the prediction source - the correct top-left corner
748   // is at (pre_x, pre_y).
749   const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
750   const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
751 
752   for (int row = row_start; row <= 0; ++row) {
753     for (int col = col_start; col <= 0; ++col) {
754       const MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
755       if (!is_inter_block(this_mbmi)) return false;
756       if (is_intrabc_block(this_mbmi)) return false;
757     }
758   }
759   return true;
760 }
761 
build_inter_predictors_sub8x8(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int bw,int bh,int mi_x,int mi_y,CalcSubpelParamsFunc calc_subpel_params_func)762 static void build_inter_predictors_sub8x8(
763     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
764     int bw, int bh, int mi_x, int mi_y,
765     CalcSubpelParamsFunc calc_subpel_params_func) {
766   const BLOCK_SIZE bsize = mi->sb_type;
767   struct macroblockd_plane *const pd = &xd->plane[plane];
768   const bool ss_x = pd->subsampling_x;
769   const bool ss_y = pd->subsampling_y;
770   const int b4_w = block_size_wide[bsize] >> ss_x;
771   const int b4_h = block_size_high[bsize] >> ss_y;
772   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ss_x, ss_y);
773   const int b8_w = block_size_wide[plane_bsize];
774   const int b8_h = block_size_high[plane_bsize];
775   const int is_compound = has_second_ref(mi);
776   assert(!is_compound);
777   assert(!is_intrabc_block(mi));
778 
779   // For sub8x8 chroma blocks, we may be covering more than one luma block's
780   // worth of pixels. Thus (mi_x, mi_y) may not be the correct coordinates for
781   // the top-left corner of the prediction source - the correct top-left corner
782   // is at (pre_x, pre_y).
783   const int row_start = (block_size_high[bsize] == 4) && ss_y ? -1 : 0;
784   const int col_start = (block_size_wide[bsize] == 4) && ss_x ? -1 : 0;
785   const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
786   const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
787 
788   int row = row_start;
789   for (int y = 0; y < b8_h; y += b4_h) {
790     int col = col_start;
791     for (int x = 0; x < b8_w; x += b4_w) {
792       MB_MODE_INFO *this_mbmi = xd->mi[row * xd->mi_stride + col];
793       int tmp_dst_stride = 8;
794       assert(bw < 8 || bh < 8);
795       (void)bw;
796       (void)bh;
797       struct buf_2d *const dst_buf = &pd->dst;
798       uint8_t *dst = dst_buf->buf + dst_buf->stride * y + x;
799       int ref = 0;
800       const RefCntBuffer *ref_buf =
801           get_ref_frame_buf(cm, this_mbmi->ref_frame[ref]);
802       const struct scale_factors *ref_scale_factors =
803           get_ref_scale_factors_const(cm, this_mbmi->ref_frame[ref]);
804       const struct scale_factors *const sf = ref_scale_factors;
805       const struct buf_2d pre_buf = {
806         NULL,
807         (plane == 1) ? ref_buf->buf.u_buffer : ref_buf->buf.v_buffer,
808         ref_buf->buf.uv_crop_width,
809         ref_buf->buf.uv_crop_height,
810         ref_buf->buf.uv_stride,
811       };
812 
813       const MV mv = this_mbmi->mv[ref].as_mv;
814 
815       InterPredParams inter_pred_params;
816       av1_init_inter_params(&inter_pred_params, b4_w, b4_h, pre_y + y,
817                             pre_x + x, pd->subsampling_x, pd->subsampling_y,
818                             xd->bd, is_cur_buf_hbd(xd), mi->use_intrabc, sf,
819                             &pre_buf, this_mbmi->interp_filters);
820       inter_pred_params.conv_params = get_conv_params_no_round(
821           ref, plane, xd->tmp_conv_dst, tmp_dst_stride, is_compound, xd->bd);
822       inter_pred_params.conv_params.use_dist_wtd_comp_avg = 0;
823 
824       av1_build_one_inter_predictor(dst, dst_buf->stride, &mv,
825                                     &inter_pred_params, xd, mi_x + x, mi_y + y,
826                                     ref, calc_subpel_params_func);
827 
828       ++col;
829     }
830     ++row;
831   }
832 }
833 
build_inter_predictors_8x8_and_bigger(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int build_for_obmc,int bw,int bh,int mi_x,int mi_y,CalcSubpelParamsFunc calc_subpel_params_func)834 static void build_inter_predictors_8x8_and_bigger(
835     const AV1_COMMON *cm, MACROBLOCKD *xd, int plane, const MB_MODE_INFO *mi,
836     int build_for_obmc, int bw, int bh, int mi_x, int mi_y,
837     CalcSubpelParamsFunc calc_subpel_params_func) {
838   const int is_compound = has_second_ref(mi);
839   const int is_intrabc = is_intrabc_block(mi);
840   assert(IMPLIES(is_intrabc, !is_compound));
841   struct macroblockd_plane *const pd = &xd->plane[plane];
842   struct buf_2d *const dst_buf = &pd->dst;
843   uint8_t *const dst = dst_buf->buf;
844 
845   int is_global[2] = { 0, 0 };
846   for (int ref = 0; ref < 1 + is_compound; ++ref) {
847     const WarpedMotionParams *const wm = &xd->global_motion[mi->ref_frame[ref]];
848     is_global[ref] = is_global_mv_block(mi, wm->wmtype);
849   }
850 
851   const BLOCK_SIZE bsize = mi->sb_type;
852   const int ss_x = pd->subsampling_x;
853   const int ss_y = pd->subsampling_y;
854   const int row_start =
855       (block_size_high[bsize] == 4) && ss_y && !build_for_obmc ? -1 : 0;
856   const int col_start =
857       (block_size_wide[bsize] == 4) && ss_x && !build_for_obmc ? -1 : 0;
858   const int pre_x = (mi_x + MI_SIZE * col_start) >> ss_x;
859   const int pre_y = (mi_y + MI_SIZE * row_start) >> ss_y;
860 
861   for (int ref = 0; ref < 1 + is_compound; ++ref) {
862     const struct scale_factors *const sf =
863         is_intrabc ? &cm->sf_identity : xd->block_ref_scale_factors[ref];
864     struct buf_2d *const pre_buf = is_intrabc ? dst_buf : &pd->pre[ref];
865     const MV mv = mi->mv[ref].as_mv;
866     const WarpTypesAllowed warp_types = { is_global[ref],
867                                           mi->motion_mode == WARPED_CAUSAL };
868 
869     InterPredParams inter_pred_params;
870     av1_init_inter_params(&inter_pred_params, bw, bh, pre_y, pre_x,
871                           pd->subsampling_x, pd->subsampling_y, xd->bd,
872                           is_cur_buf_hbd(xd), mi->use_intrabc, sf, pre_buf,
873                           mi->interp_filters);
874     if (is_compound) av1_init_comp_mode(&inter_pred_params);
875     inter_pred_params.conv_params = get_conv_params_no_round(
876         ref, plane, xd->tmp_conv_dst, MAX_SB_SIZE, is_compound, xd->bd);
877 
878     av1_dist_wtd_comp_weight_assign(
879         cm, mi, 0, &inter_pred_params.conv_params.fwd_offset,
880         &inter_pred_params.conv_params.bck_offset,
881         &inter_pred_params.conv_params.use_dist_wtd_comp_avg, is_compound);
882 
883     if (!build_for_obmc)
884       av1_init_warp_params(&inter_pred_params, &warp_types, ref, xd, mi);
885 
886     if (is_masked_compound_type(mi->interinter_comp.type)) {
887       av1_init_mask_comp(&inter_pred_params, mi->sb_type, &mi->interinter_comp);
888       // Assign physical buffer.
889       inter_pred_params.mask_comp.seg_mask = xd->seg_mask;
890     }
891 
892     av1_build_one_inter_predictor(dst, dst_buf->stride, &mv, &inter_pred_params,
893                                   xd, mi_x, mi_y, ref, calc_subpel_params_func);
894   }
895 }
896 
av1_build_inter_predictors(const AV1_COMMON * cm,MACROBLOCKD * xd,int plane,const MB_MODE_INFO * mi,int build_for_obmc,int bw,int bh,int mi_x,int mi_y,CalcSubpelParamsFunc calc_subpel_params_func)897 void av1_build_inter_predictors(const AV1_COMMON *cm, MACROBLOCKD *xd,
898                                 int plane, const MB_MODE_INFO *mi,
899                                 int build_for_obmc, int bw, int bh, int mi_x,
900                                 int mi_y,
901                                 CalcSubpelParamsFunc calc_subpel_params_func) {
902   if (is_sub8x8_inter(xd, plane, mi->sb_type, is_intrabc_block(mi),
903                       build_for_obmc)) {
904     build_inter_predictors_sub8x8(cm, xd, plane, mi, bw, bh, mi_x, mi_y,
905                                   calc_subpel_params_func);
906   } else {
907     build_inter_predictors_8x8_and_bigger(cm, xd, plane, mi, build_for_obmc, bw,
908                                           bh, mi_x, mi_y,
909                                           calc_subpel_params_func);
910   }
911 }
912 
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int order_idx,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)913 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
914                                      const MB_MODE_INFO *mbmi, int order_idx,
915                                      int *fwd_offset, int *bck_offset,
916                                      int *use_dist_wtd_comp_avg,
917                                      int is_compound) {
918   assert(fwd_offset != NULL && bck_offset != NULL);
919   if (!is_compound || mbmi->compound_idx) {
920     *use_dist_wtd_comp_avg = 0;
921     return;
922   }
923 
924   *use_dist_wtd_comp_avg = 1;
925   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
926   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
927   const int cur_frame_index = cm->cur_frame->order_hint;
928   int bck_frame_index = 0, fwd_frame_index = 0;
929 
930   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
931   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
932 
933   int d0 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
934                                        fwd_frame_index, cur_frame_index)),
935                  0, MAX_FRAME_DISTANCE);
936   int d1 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
937                                        cur_frame_index, bck_frame_index)),
938                  0, MAX_FRAME_DISTANCE);
939 
940   const int order = d0 <= d1;
941 
942   if (d0 == 0 || d1 == 0) {
943     *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
944     *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
945     return;
946   }
947 
948   int i;
949   for (i = 0; i < 3; ++i) {
950     int c0 = quant_dist_weight[i][order];
951     int c1 = quant_dist_weight[i][!order];
952     int d0_c0 = d0 * c0;
953     int d1_c1 = d1 * c1;
954     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
955   }
956 
957   *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
958   *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
959 }
960 
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)961 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
962                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
963                           const int plane_start, const int plane_end) {
964   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
965   // the static analysis warnings.
966   for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
967     struct macroblockd_plane *const pd = &planes[i];
968     const int is_uv = i > 0;
969     setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
970                      src->crop_heights[is_uv], src->strides[is_uv], mi_row,
971                      mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
972   }
973 }
974 
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)975 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
976                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
977                           const struct scale_factors *sf,
978                           const int num_planes) {
979   if (src != NULL) {
980     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
981     // the static analysis warnings.
982     for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
983       struct macroblockd_plane *const pd = &xd->plane[i];
984       const int is_uv = i > 0;
985       setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i],
986                        src->crop_widths[is_uv], src->crop_heights[is_uv],
987                        src->strides[is_uv], mi_row, mi_col, sf,
988                        pd->subsampling_x, pd->subsampling_y);
989     }
990   }
991 }
992 
993 // obmc_mask_N[overlap_position]
994 static const uint8_t obmc_mask_1[1] = { 64 };
995 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
996 
997 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
998 
999 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
1000 
1001 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
1002                                           56, 58, 60, 61, 64, 64, 64, 64 };
1003 
1004 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
1005                                           45, 47, 48, 50, 51, 52, 53, 55,
1006                                           56, 57, 58, 59, 60, 60, 61, 62,
1007                                           64, 64, 64, 64, 64, 64, 64, 64 };
1008 
1009 static const uint8_t obmc_mask_64[64] = {
1010   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
1011   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
1012   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
1013   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
1014 };
1015 
av1_get_obmc_mask(int length)1016 const uint8_t *av1_get_obmc_mask(int length) {
1017   switch (length) {
1018     case 1: return obmc_mask_1;
1019     case 2: return obmc_mask_2;
1020     case 4: return obmc_mask_4;
1021     case 8: return obmc_mask_8;
1022     case 16: return obmc_mask_16;
1023     case 32: return obmc_mask_32;
1024     case 64: return obmc_mask_64;
1025     default: assert(0); return NULL;
1026   }
1027 }
1028 
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)1029 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_row,
1030                                      int rel_mi_col, uint8_t op_mi_size,
1031                                      int dir, MB_MODE_INFO *mi, void *fun_ctxt,
1032                                      const int num_planes) {
1033   (void)xd;
1034   (void)rel_mi_row;
1035   (void)rel_mi_col;
1036   (void)op_mi_size;
1037   (void)dir;
1038   (void)mi;
1039   ++*(int *)fun_ctxt;
1040   (void)num_planes;
1041 }
1042 
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd)1043 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd) {
1044   MB_MODE_INFO *mbmi = xd->mi[0];
1045 
1046   mbmi->overlappable_neighbors[0] = 0;
1047   mbmi->overlappable_neighbors[1] = 0;
1048 
1049   if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
1050 
1051   foreach_overlappable_nb_above(cm, xd, INT_MAX, increment_int_ptr,
1052                                 &mbmi->overlappable_neighbors[0]);
1053   foreach_overlappable_nb_left(cm, xd, INT_MAX, increment_int_ptr,
1054                                &mbmi->overlappable_neighbors[1]);
1055 }
1056 
1057 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
1058 // block-size of current plane is smaller than 8x8, always only blend with the
1059 // left neighbor(s) (skip blending with the above side).
1060 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
1061 
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)1062 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
1063                                const struct macroblockd_plane *pd, int dir) {
1064   assert(is_motion_variation_allowed_bsize(bsize));
1065 
1066   const BLOCK_SIZE bsize_plane =
1067       get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
1068   switch (bsize_plane) {
1069 #if DISABLE_CHROMA_U8X8_OBMC
1070     case BLOCK_4X4:
1071     case BLOCK_8X4:
1072     case BLOCK_4X8: return 1; break;
1073 #else
1074     case BLOCK_4X4:
1075     case BLOCK_8X4:
1076     case BLOCK_4X8: return dir == 0; break;
1077 #endif
1078     default: return 0;
1079   }
1080 }
1081 
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)1082 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
1083   mbmi->ref_frame[1] = NONE_FRAME;
1084   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
1085 
1086   return;
1087 }
1088 
1089 struct obmc_inter_pred_ctxt {
1090   uint8_t **adjacent;
1091   int *adjacent_stride;
1092 };
1093 
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)1094 static INLINE void build_obmc_inter_pred_above(
1095     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1096     int dir, MB_MODE_INFO *above_mi, void *fun_ctxt, const int num_planes) {
1097   (void)above_mi;
1098   (void)rel_mi_row;
1099   (void)dir;
1100   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1101   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
1102   const int overlap =
1103       AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
1104 
1105   for (int plane = 0; plane < num_planes; ++plane) {
1106     const struct macroblockd_plane *pd = &xd->plane[plane];
1107     const int bw = (op_mi_size * MI_SIZE) >> pd->subsampling_x;
1108     const int bh = overlap >> pd->subsampling_y;
1109     const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
1110 
1111     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
1112 
1113     const int dst_stride = pd->dst.stride;
1114     uint8_t *const dst = &pd->dst.buf[plane_col];
1115     const int tmp_stride = ctxt->adjacent_stride[plane];
1116     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
1117     const uint8_t *const mask = av1_get_obmc_mask(bh);
1118 #if CONFIG_AV1_HIGHBITDEPTH
1119     const int is_hbd = is_cur_buf_hbd(xd);
1120     if (is_hbd)
1121       aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
1122                                  tmp_stride, mask, bw, bh, xd->bd);
1123     else
1124       aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1125                           mask, bw, bh);
1126 #else
1127     aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1128                         bw, bh);
1129 #endif
1130   }
1131 }
1132 
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,int rel_mi_col,uint8_t op_mi_size,int dir,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)1133 static INLINE void build_obmc_inter_pred_left(
1134     MACROBLOCKD *xd, int rel_mi_row, int rel_mi_col, uint8_t op_mi_size,
1135     int dir, MB_MODE_INFO *left_mi, void *fun_ctxt, const int num_planes) {
1136   (void)left_mi;
1137   (void)rel_mi_col;
1138   (void)dir;
1139   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
1140   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
1141   const int overlap =
1142       AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
1143 
1144   for (int plane = 0; plane < num_planes; ++plane) {
1145     const struct macroblockd_plane *pd = &xd->plane[plane];
1146     const int bw = overlap >> pd->subsampling_x;
1147     const int bh = (op_mi_size * MI_SIZE) >> pd->subsampling_y;
1148     const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
1149 
1150     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
1151 
1152     const int dst_stride = pd->dst.stride;
1153     uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
1154     const int tmp_stride = ctxt->adjacent_stride[plane];
1155     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
1156     const uint8_t *const mask = av1_get_obmc_mask(bw);
1157 
1158 #if CONFIG_AV1_HIGHBITDEPTH
1159     const int is_hbd = is_cur_buf_hbd(xd);
1160     if (is_hbd)
1161       aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
1162                                  tmp_stride, mask, bw, bh, xd->bd);
1163     else
1164       aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
1165                           mask, bw, bh);
1166 #else
1167     aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride, mask,
1168                         bw, bh);
1169 #endif
1170   }
1171 }
1172 
1173 // This function combines motion compensated predictions that are generated by
1174 // top/left neighboring blocks' inter predictors with the regular inter
1175 // prediction. We assume the original prediction (bmc) is stored in
1176 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])1177 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
1178                                      uint8_t *above[MAX_MB_PLANE],
1179                                      int above_stride[MAX_MB_PLANE],
1180                                      uint8_t *left[MAX_MB_PLANE],
1181                                      int left_stride[MAX_MB_PLANE]) {
1182   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
1183 
1184   // handle above row
1185   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
1186   foreach_overlappable_nb_above(cm, xd,
1187                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
1188                                 build_obmc_inter_pred_above, &ctxt_above);
1189 
1190   // handle left column
1191   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
1192   foreach_overlappable_nb_left(cm, xd,
1193                                max_neighbor_obmc[mi_size_high_log2[bsize]],
1194                                build_obmc_inter_pred_left, &ctxt_left);
1195 }
1196 
av1_setup_address_for_obmc(MACROBLOCKD * xd,int mi_row_offset,int mi_col_offset,MB_MODE_INFO * ref_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1197 void av1_setup_address_for_obmc(MACROBLOCKD *xd, int mi_row_offset,
1198                                 int mi_col_offset, MB_MODE_INFO *ref_mbmi,
1199                                 struct build_prediction_ctxt *ctxt,
1200                                 const int num_planes) {
1201   const BLOCK_SIZE ref_bsize = AOMMAX(BLOCK_8X8, ref_mbmi->sb_type);
1202   const int ref_mi_row = xd->mi_row + mi_row_offset;
1203   const int ref_mi_col = xd->mi_col + mi_col_offset;
1204 
1205   for (int plane = 0; plane < num_planes; ++plane) {
1206     struct macroblockd_plane *const pd = &xd->plane[plane];
1207     setup_pred_plane(&pd->dst, ref_bsize, ctxt->tmp_buf[plane],
1208                      ctxt->tmp_width[plane], ctxt->tmp_height[plane],
1209                      ctxt->tmp_stride[plane], mi_row_offset, mi_col_offset,
1210                      NULL, pd->subsampling_x, pd->subsampling_y);
1211   }
1212 
1213   const MV_REFERENCE_FRAME frame = ref_mbmi->ref_frame[0];
1214 
1215   const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1216   const struct scale_factors *const sf =
1217       get_ref_scale_factors_const(ctxt->cm, frame);
1218 
1219   xd->block_ref_scale_factors[0] = sf;
1220   if ((!av1_is_valid_scale(sf)))
1221     aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1222                        "Reference frame has invalid dimensions");
1223 
1224   av1_setup_pre_planes(xd, 0, &ref_buf->buf, ref_mi_row, ref_mi_col, sf,
1225                        num_planes);
1226 }
1227 
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1228 void av1_setup_build_prediction_by_above_pred(
1229     MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
1230     MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
1231     const int num_planes) {
1232   const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
1233   const int above_mi_col = xd->mi_col + rel_mi_col;
1234 
1235   av1_modify_neighbor_predictor_for_obmc(above_mbmi);
1236 
1237   for (int j = 0; j < num_planes; ++j) {
1238     struct macroblockd_plane *const pd = &xd->plane[j];
1239     setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1240                      ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
1241                      NULL, pd->subsampling_x, pd->subsampling_y);
1242   }
1243 
1244   const int num_refs = 1 + has_second_ref(above_mbmi);
1245 
1246   for (int ref = 0; ref < num_refs; ++ref) {
1247     const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
1248 
1249     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1250     const struct scale_factors *const sf =
1251         get_ref_scale_factors_const(ctxt->cm, frame);
1252     xd->block_ref_scale_factors[ref] = sf;
1253     if ((!av1_is_valid_scale(sf)))
1254       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1255                          "Reference frame has invalid dimensions");
1256     av1_setup_pre_planes(xd, ref, &ref_buf->buf, xd->mi_row, above_mi_col, sf,
1257                          num_planes);
1258   }
1259 
1260   xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
1261   xd->mb_to_right_edge =
1262       ctxt->mb_to_far_edge +
1263       (xd->width - rel_mi_col - above_mi_width) * MI_SIZE * 8;
1264 }
1265 
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)1266 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
1267                                              uint8_t left_mi_height,
1268                                              MB_MODE_INFO *left_mbmi,
1269                                              struct build_prediction_ctxt *ctxt,
1270                                              const int num_planes) {
1271   const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
1272   const int left_mi_row = xd->mi_row + rel_mi_row;
1273 
1274   av1_modify_neighbor_predictor_for_obmc(left_mbmi);
1275 
1276   for (int j = 0; j < num_planes; ++j) {
1277     struct macroblockd_plane *const pd = &xd->plane[j];
1278     setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
1279                      ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
1280                      NULL, pd->subsampling_x, pd->subsampling_y);
1281   }
1282 
1283   const int num_refs = 1 + has_second_ref(left_mbmi);
1284 
1285   for (int ref = 0; ref < num_refs; ++ref) {
1286     const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
1287 
1288     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
1289     const struct scale_factors *const ref_scale_factors =
1290         get_ref_scale_factors_const(ctxt->cm, frame);
1291 
1292     xd->block_ref_scale_factors[ref] = ref_scale_factors;
1293     if ((!av1_is_valid_scale(ref_scale_factors)))
1294       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
1295                          "Reference frame has invalid dimensions");
1296     av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, xd->mi_col,
1297                          ref_scale_factors, num_planes);
1298   }
1299 
1300   xd->mb_to_top_edge = GET_MV_SUBPEL(MI_SIZE * (-left_mi_row));
1301   xd->mb_to_bottom_edge =
1302       ctxt->mb_to_far_edge +
1303       GET_MV_SUBPEL((xd->height - rel_mi_row - left_mi_height) * MI_SIZE);
1304 }
1305 
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1306 static AOM_INLINE void combine_interintra(
1307     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1308     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1309     uint8_t *comppred, int compstride, const uint8_t *interpred,
1310     int interstride, const uint8_t *intrapred, int intrastride) {
1311   const int bw = block_size_wide[plane_bsize];
1312   const int bh = block_size_high[plane_bsize];
1313 
1314   if (use_wedge_interintra) {
1315     if (av1_is_wedge_used(bsize)) {
1316       const uint8_t *mask =
1317           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1318       const int subw = 2 * mi_size_wide[bsize] == bw;
1319       const int subh = 2 * mi_size_high[bsize] == bh;
1320       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1321                          interpred, interstride, mask, block_size_wide[bsize],
1322                          bw, bh, subw, subh);
1323     }
1324     return;
1325   }
1326 
1327   const uint8_t *mask = smooth_interintra_mask_buf[mode][plane_bsize];
1328   aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1329                      interstride, mask, bw, bw, bh, 0, 0);
1330 }
1331 
1332 #if CONFIG_AV1_HIGHBITDEPTH
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int8_t wedge_index,int8_t wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1333 static AOM_INLINE void combine_interintra_highbd(
1334     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int8_t wedge_index,
1335     int8_t wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1336     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1337     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1338   const int bw = block_size_wide[plane_bsize];
1339   const int bh = block_size_high[plane_bsize];
1340 
1341   if (use_wedge_interintra) {
1342     if (av1_is_wedge_used(bsize)) {
1343       const uint8_t *mask =
1344           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1345       const int subh = 2 * mi_size_high[bsize] == bh;
1346       const int subw = 2 * mi_size_wide[bsize] == bw;
1347       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1348                                 interpred8, interstride, mask,
1349                                 block_size_wide[bsize], bw, bh, subw, subh, bd);
1350     }
1351     return;
1352   }
1353 
1354   uint8_t mask[MAX_SB_SQUARE];
1355   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1356   aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1357                             interpred8, interstride, mask, bw, bw, bh, 0, 0,
1358                             bd);
1359 }
1360 #endif
1361 
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1362 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1363                                                MACROBLOCKD *xd,
1364                                                BLOCK_SIZE bsize, int plane,
1365                                                const BUFFER_SET *ctx,
1366                                                uint8_t *dst, int dst_stride) {
1367   struct macroblockd_plane *const pd = &xd->plane[plane];
1368   const int ssx = xd->plane[plane].subsampling_x;
1369   const int ssy = xd->plane[plane].subsampling_y;
1370   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1371   PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1372   assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1373   assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1374   assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1375   assert(xd->mi[0]->use_intrabc == 0);
1376 
1377   av1_predict_intra_block(cm, xd, pd->width, pd->height,
1378                           max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
1379                           FILTER_INTRA_MODES, ctx->plane[plane],
1380                           ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1381 }
1382 
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1383 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1384                             const uint8_t *inter_pred, int inter_stride,
1385                             const uint8_t *intra_pred, int intra_stride) {
1386   const int ssx = xd->plane[plane].subsampling_x;
1387   const int ssy = xd->plane[plane].subsampling_y;
1388   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1389 #if CONFIG_AV1_HIGHBITDEPTH
1390   if (is_cur_buf_hbd(xd)) {
1391     combine_interintra_highbd(
1392         xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1393         xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1394         plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1395         inter_pred, inter_stride, intra_pred, intra_stride, xd->bd);
1396     return;
1397   }
1398 #endif
1399   combine_interintra(
1400       xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1401       xd->mi[0]->interintra_wedge_index, INTERINTRA_WEDGE_SIGN, bsize,
1402       plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1403       inter_pred, inter_stride, intra_pred, intra_stride);
1404 }
1405 
1406 // build interintra_predictors for one plane
av1_build_interintra_predictor(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1407 void av1_build_interintra_predictor(const AV1_COMMON *cm, MACROBLOCKD *xd,
1408                                     uint8_t *pred, int stride,
1409                                     const BUFFER_SET *ctx, int plane,
1410                                     BLOCK_SIZE bsize) {
1411   assert(bsize < BLOCK_SIZES_ALL);
1412   if (is_cur_buf_hbd(xd)) {
1413     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1414     av1_build_intra_predictors_for_interintra(
1415         cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1416         MAX_SB_SIZE);
1417     av1_combine_interintra(xd, bsize, plane, pred, stride,
1418                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1419   } else {
1420     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1421     av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1422                                               intrapredictor, MAX_SB_SIZE);
1423     av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1424                            MAX_SB_SIZE);
1425   }
1426 }
1427