1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <stdio.h>
14 #include <limits.h>
15 
16 #include "config/aom_config.h"
17 #include "config/aom_dsp_rtcd.h"
18 #include "config/aom_scale_rtcd.h"
19 
20 #include "aom/aom_integer.h"
21 #include "aom_dsp/blend.h"
22 
23 #include "av1/common/blockd.h"
24 #include "av1/common/mvref_common.h"
25 #include "av1/common/reconinter.h"
26 #include "av1/common/reconintra.h"
27 #include "av1/common/onyxc_int.h"
28 #include "av1/common/obmc.h"
29 
30 #define USE_PRECOMPUTED_WEDGE_MASK 1
31 #define USE_PRECOMPUTED_WEDGE_SIGN 1
32 
33 // This function will determine whether or not to create a warped
34 // prediction.
av1_allow_warp(const MB_MODE_INFO * const mbmi,const WarpTypesAllowed * const warp_types,const WarpedMotionParams * const gm_params,int build_for_obmc,const struct scale_factors * const sf,WarpedMotionParams * final_warp_params)35 int av1_allow_warp(const MB_MODE_INFO *const mbmi,
36                    const WarpTypesAllowed *const warp_types,
37                    const WarpedMotionParams *const gm_params,
38                    int build_for_obmc, const struct scale_factors *const sf,
39                    WarpedMotionParams *final_warp_params) {
40   // Note: As per the spec, we must test the fixed point scales here, which are
41   // at a higher precision (1 << 14) than the xs and ys in subpel_params (that
42   // have 1 << 10 precision).
43   if (av1_is_scaled(sf)) return 0;
44 
45   if (final_warp_params != NULL) *final_warp_params = default_warp_params;
46 
47   if (build_for_obmc) return 0;
48 
49   if (warp_types->local_warp_allowed && !mbmi->wm_params.invalid) {
50     if (final_warp_params != NULL)
51       memcpy(final_warp_params, &mbmi->wm_params, sizeof(*final_warp_params));
52     return 1;
53   } else if (warp_types->global_warp_allowed && !gm_params->invalid) {
54     if (final_warp_params != NULL)
55       memcpy(final_warp_params, gm_params, sizeof(*final_warp_params));
56     return 1;
57   }
58 
59   return 0;
60 }
61 
av1_make_inter_predictor(const uint8_t * src,int src_stride,uint8_t * dst,int dst_stride,const SubpelParams * subpel_params,const struct scale_factors * sf,int w,int h,ConvolveParams * conv_params,InterpFilters interp_filters,const WarpTypesAllowed * warp_types,int p_col,int p_row,int plane,int ref,const MB_MODE_INFO * mi,int build_for_obmc,const MACROBLOCKD * xd,int can_use_previous)62 void av1_make_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst,
63                               int dst_stride, const SubpelParams *subpel_params,
64                               const struct scale_factors *sf, int w, int h,
65                               ConvolveParams *conv_params,
66                               InterpFilters interp_filters,
67                               const WarpTypesAllowed *warp_types, int p_col,
68                               int p_row, int plane, int ref,
69                               const MB_MODE_INFO *mi, int build_for_obmc,
70                               const MACROBLOCKD *xd, int can_use_previous) {
71   // Make sure the selected motion mode is valid for this configuration
72   assert_motion_mode_valid(mi->motion_mode, xd->global_motion, xd, mi,
73                            can_use_previous);
74   assert(IMPLIES(conv_params->is_compound, conv_params->dst != NULL));
75 
76   WarpedMotionParams final_warp_params;
77   const int do_warp =
78       (w >= 8 && h >= 8 &&
79        av1_allow_warp(mi, warp_types, &xd->global_motion[mi->ref_frame[ref]],
80                       build_for_obmc, sf, &final_warp_params));
81   const int is_intrabc = mi->use_intrabc;
82   assert(IMPLIES(is_intrabc, !do_warp));
83 
84   if (do_warp && xd->cur_frame_force_integer_mv == 0) {
85     const struct macroblockd_plane *const pd = &xd->plane[plane];
86     const struct buf_2d *const pre_buf = &pd->pre[ref];
87     av1_warp_plane(&final_warp_params, is_cur_buf_hbd(xd), xd->bd,
88                    pre_buf->buf0, pre_buf->width, pre_buf->height,
89                    pre_buf->stride, dst, p_col, p_row, w, h, dst_stride,
90                    pd->subsampling_x, pd->subsampling_y, conv_params);
91   } else if (is_cur_buf_hbd(xd)) {
92     highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf,
93                            w, h, conv_params, interp_filters, is_intrabc,
94                            xd->bd);
95   } else {
96     inter_predictor(src, src_stride, dst, dst_stride, subpel_params, sf, w, h,
97                     conv_params, interp_filters, is_intrabc);
98   }
99 }
100 
101 #if USE_PRECOMPUTED_WEDGE_MASK
102 static const uint8_t wedge_master_oblique_odd[MASK_MASTER_SIZE] = {
103   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
104   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  2,  6,  18,
105   37, 53, 60, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
106   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
107 };
108 static const uint8_t wedge_master_oblique_even[MASK_MASTER_SIZE] = {
109   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
110   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  4,  11, 27,
111   46, 58, 62, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
112   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
113 };
114 static const uint8_t wedge_master_vertical[MASK_MASTER_SIZE] = {
115   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
116   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  2,  7,  21,
117   43, 57, 62, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
118   64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
119 };
120 
shift_copy(const uint8_t * src,uint8_t * dst,int shift,int width)121 static void shift_copy(const uint8_t *src, uint8_t *dst, int shift, int width) {
122   if (shift >= 0) {
123     memcpy(dst + shift, src, width - shift);
124     memset(dst, src[0], shift);
125   } else {
126     shift = -shift;
127     memcpy(dst, src + shift, width - shift);
128     memset(dst + width - shift, src[width - 1], shift);
129   }
130 }
131 #endif  // USE_PRECOMPUTED_WEDGE_MASK
132 
133 #if USE_PRECOMPUTED_WEDGE_SIGN
134 /* clang-format off */
135 DECLARE_ALIGNED(16, static uint8_t,
136                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]) = {
137   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
138   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
139   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
140   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
141   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
142   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
143   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
144   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
145   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
146   { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, },
147   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
148   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
149   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
150   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
151   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
152   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
153   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
154   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
155   { 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, },
156   { 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, },
157   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
158   { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, },  // not used
159 };
160 /* clang-format on */
161 #else
162 DECLARE_ALIGNED(16, static uint8_t,
163                 wedge_signflip_lookup[BLOCK_SIZES_ALL][MAX_WEDGE_TYPES]);
164 #endif  // USE_PRECOMPUTED_WEDGE_SIGN
165 
166 // [negative][direction]
167 DECLARE_ALIGNED(
168     16, static uint8_t,
169     wedge_mask_obl[2][WEDGE_DIRECTIONS][MASK_MASTER_SIZE * MASK_MASTER_SIZE]);
170 
171 // 4 * MAX_WEDGE_SQUARE is an easy to compute and fairly tight upper bound
172 // on the sum of all mask sizes up to an including MAX_WEDGE_SQUARE.
173 DECLARE_ALIGNED(16, static uint8_t,
174                 wedge_mask_buf[2 * MAX_WEDGE_TYPES * 4 * MAX_WEDGE_SQUARE]);
175 
176 static wedge_masks_type wedge_masks[BLOCK_SIZES_ALL][2];
177 
178 static const wedge_code_type wedge_codebook_16_hgtw[16] = {
179   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
180   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
181   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 4 },
182   { WEDGE_HORIZONTAL, 4, 6 }, { WEDGE_VERTICAL, 4, 4 },
183   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
184   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
185   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
186   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
187 };
188 
189 static const wedge_code_type wedge_codebook_16_hltw[16] = {
190   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
191   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
192   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 4, 4 },
193   { WEDGE_VERTICAL, 6, 4 },   { WEDGE_HORIZONTAL, 4, 4 },
194   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
195   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
196   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
197   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
198 };
199 
200 static const wedge_code_type wedge_codebook_16_heqw[16] = {
201   { WEDGE_OBLIQUE27, 4, 4 },  { WEDGE_OBLIQUE63, 4, 4 },
202   { WEDGE_OBLIQUE117, 4, 4 }, { WEDGE_OBLIQUE153, 4, 4 },
203   { WEDGE_HORIZONTAL, 4, 2 }, { WEDGE_HORIZONTAL, 4, 6 },
204   { WEDGE_VERTICAL, 2, 4 },   { WEDGE_VERTICAL, 6, 4 },
205   { WEDGE_OBLIQUE27, 4, 2 },  { WEDGE_OBLIQUE27, 4, 6 },
206   { WEDGE_OBLIQUE153, 4, 2 }, { WEDGE_OBLIQUE153, 4, 6 },
207   { WEDGE_OBLIQUE63, 2, 4 },  { WEDGE_OBLIQUE63, 6, 4 },
208   { WEDGE_OBLIQUE117, 2, 4 }, { WEDGE_OBLIQUE117, 6, 4 },
209 };
210 
211 const wedge_params_type wedge_params_lookup[BLOCK_SIZES_ALL] = {
212   { 0, NULL, NULL, NULL },
213   { 0, NULL, NULL, NULL },
214   { 0, NULL, NULL, NULL },
215   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_8X8],
216     wedge_masks[BLOCK_8X8] },
217   { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X16],
218     wedge_masks[BLOCK_8X16] },
219   { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_16X8],
220     wedge_masks[BLOCK_16X8] },
221   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_16X16],
222     wedge_masks[BLOCK_16X16] },
223   { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_16X32],
224     wedge_masks[BLOCK_16X32] },
225   { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X16],
226     wedge_masks[BLOCK_32X16] },
227   { 4, wedge_codebook_16_heqw, wedge_signflip_lookup[BLOCK_32X32],
228     wedge_masks[BLOCK_32X32] },
229   { 0, NULL, NULL, NULL },
230   { 0, NULL, NULL, NULL },
231   { 0, NULL, NULL, NULL },
232   { 0, NULL, NULL, NULL },
233   { 0, NULL, NULL, NULL },
234   { 0, NULL, NULL, NULL },
235   { 0, NULL, NULL, NULL },
236   { 0, NULL, NULL, NULL },
237   { 4, wedge_codebook_16_hgtw, wedge_signflip_lookup[BLOCK_8X32],
238     wedge_masks[BLOCK_8X32] },
239   { 4, wedge_codebook_16_hltw, wedge_signflip_lookup[BLOCK_32X8],
240     wedge_masks[BLOCK_32X8] },
241   { 0, NULL, NULL, NULL },
242   { 0, NULL, NULL, NULL },
243 };
244 
get_wedge_mask_inplace(int wedge_index,int neg,BLOCK_SIZE sb_type)245 static const uint8_t *get_wedge_mask_inplace(int wedge_index, int neg,
246                                              BLOCK_SIZE sb_type) {
247   const uint8_t *master;
248   const int bh = block_size_high[sb_type];
249   const int bw = block_size_wide[sb_type];
250   const wedge_code_type *a =
251       wedge_params_lookup[sb_type].codebook + wedge_index;
252   int woff, hoff;
253   const uint8_t wsignflip = wedge_params_lookup[sb_type].signflip[wedge_index];
254 
255   assert(wedge_index >= 0 &&
256          wedge_index < (1 << get_wedge_bits_lookup(sb_type)));
257   woff = (a->x_offset * bw) >> 3;
258   hoff = (a->y_offset * bh) >> 3;
259   master = wedge_mask_obl[neg ^ wsignflip][a->direction] +
260            MASK_MASTER_STRIDE * (MASK_MASTER_SIZE / 2 - hoff) +
261            MASK_MASTER_SIZE / 2 - woff;
262   return master;
263 }
264 
av1_get_compound_type_mask(const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type)265 const uint8_t *av1_get_compound_type_mask(
266     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type) {
267   assert(is_masked_compound_type(comp_data->type));
268   (void)sb_type;
269   switch (comp_data->type) {
270     case COMPOUND_WEDGE:
271       return av1_get_contiguous_soft_mask(comp_data->wedge_index,
272                                           comp_data->wedge_sign, sb_type);
273     case COMPOUND_DIFFWTD: return comp_data->seg_mask;
274     default: assert(0); return NULL;
275   }
276 }
277 
diffwtd_mask_d16(uint8_t * mask,int which_inverse,int mask_base,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)278 static void diffwtd_mask_d16(uint8_t *mask, int which_inverse, int mask_base,
279                              const CONV_BUF_TYPE *src0, int src0_stride,
280                              const CONV_BUF_TYPE *src1, int src1_stride, int h,
281                              int w, ConvolveParams *conv_params, int bd) {
282   int round =
283       2 * FILTER_BITS - conv_params->round_0 - conv_params->round_1 + (bd - 8);
284   int i, j, m, diff;
285   for (i = 0; i < h; ++i) {
286     for (j = 0; j < w; ++j) {
287       diff = abs(src0[i * src0_stride + j] - src1[i * src1_stride + j]);
288       diff = ROUND_POWER_OF_TWO(diff, round);
289       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
290       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
291     }
292   }
293 }
294 
av1_build_compound_diffwtd_mask_d16_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,int h,int w,ConvolveParams * conv_params,int bd)295 void av1_build_compound_diffwtd_mask_d16_c(
296     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const CONV_BUF_TYPE *src0,
297     int src0_stride, const CONV_BUF_TYPE *src1, int src1_stride, int h, int w,
298     ConvolveParams *conv_params, int bd) {
299   switch (mask_type) {
300     case DIFFWTD_38:
301       diffwtd_mask_d16(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w,
302                        conv_params, bd);
303       break;
304     case DIFFWTD_38_INV:
305       diffwtd_mask_d16(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w,
306                        conv_params, bd);
307       break;
308     default: assert(0);
309   }
310 }
311 
diffwtd_mask(uint8_t * mask,int which_inverse,int mask_base,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)312 static void diffwtd_mask(uint8_t *mask, int which_inverse, int mask_base,
313                          const uint8_t *src0, int src0_stride,
314                          const uint8_t *src1, int src1_stride, int h, int w) {
315   int i, j, m, diff;
316   for (i = 0; i < h; ++i) {
317     for (j = 0; j < w; ++j) {
318       diff =
319           abs((int)src0[i * src0_stride + j] - (int)src1[i * src1_stride + j]);
320       m = clamp(mask_base + (diff / DIFF_FACTOR), 0, AOM_BLEND_A64_MAX_ALPHA);
321       mask[i * w + j] = which_inverse ? AOM_BLEND_A64_MAX_ALPHA - m : m;
322     }
323   }
324 }
325 
av1_build_compound_diffwtd_mask_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w)326 void av1_build_compound_diffwtd_mask_c(uint8_t *mask,
327                                        DIFFWTD_MASK_TYPE mask_type,
328                                        const uint8_t *src0, int src0_stride,
329                                        const uint8_t *src1, int src1_stride,
330                                        int h, int w) {
331   switch (mask_type) {
332     case DIFFWTD_38:
333       diffwtd_mask(mask, 0, 38, src0, src0_stride, src1, src1_stride, h, w);
334       break;
335     case DIFFWTD_38_INV:
336       diffwtd_mask(mask, 1, 38, src0, src0_stride, src1, src1_stride, h, w);
337       break;
338     default: assert(0);
339   }
340 }
341 
diffwtd_mask_highbd(uint8_t * mask,int which_inverse,int mask_base,const uint16_t * src0,int src0_stride,const uint16_t * src1,int src1_stride,int h,int w,const unsigned int bd)342 static AOM_FORCE_INLINE void diffwtd_mask_highbd(
343     uint8_t *mask, int which_inverse, int mask_base, const uint16_t *src0,
344     int src0_stride, const uint16_t *src1, int src1_stride, int h, int w,
345     const unsigned int bd) {
346   assert(bd >= 8);
347   if (bd == 8) {
348     if (which_inverse) {
349       for (int i = 0; i < h; ++i) {
350         for (int j = 0; j < w; ++j) {
351           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
352           unsigned int m = negative_to_zero(mask_base + diff);
353           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
354           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
355         }
356         src0 += src0_stride;
357         src1 += src1_stride;
358         mask += w;
359       }
360     } else {
361       for (int i = 0; i < h; ++i) {
362         for (int j = 0; j < w; ++j) {
363           int diff = abs((int)src0[j] - (int)src1[j]) / DIFF_FACTOR;
364           unsigned int m = negative_to_zero(mask_base + diff);
365           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
366           mask[j] = m;
367         }
368         src0 += src0_stride;
369         src1 += src1_stride;
370         mask += w;
371       }
372     }
373   } else {
374     const unsigned int bd_shift = bd - 8;
375     if (which_inverse) {
376       for (int i = 0; i < h; ++i) {
377         for (int j = 0; j < w; ++j) {
378           int diff =
379               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
380           unsigned int m = negative_to_zero(mask_base + diff);
381           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
382           mask[j] = AOM_BLEND_A64_MAX_ALPHA - m;
383         }
384         src0 += src0_stride;
385         src1 += src1_stride;
386         mask += w;
387       }
388     } else {
389       for (int i = 0; i < h; ++i) {
390         for (int j = 0; j < w; ++j) {
391           int diff =
392               (abs((int)src0[j] - (int)src1[j]) >> bd_shift) / DIFF_FACTOR;
393           unsigned int m = negative_to_zero(mask_base + diff);
394           m = AOMMIN(m, AOM_BLEND_A64_MAX_ALPHA);
395           mask[j] = m;
396         }
397         src0 += src0_stride;
398         src1 += src1_stride;
399         mask += w;
400       }
401     }
402   }
403 }
404 
av1_build_compound_diffwtd_mask_highbd_c(uint8_t * mask,DIFFWTD_MASK_TYPE mask_type,const uint8_t * src0,int src0_stride,const uint8_t * src1,int src1_stride,int h,int w,int bd)405 void av1_build_compound_diffwtd_mask_highbd_c(
406     uint8_t *mask, DIFFWTD_MASK_TYPE mask_type, const uint8_t *src0,
407     int src0_stride, const uint8_t *src1, int src1_stride, int h, int w,
408     int bd) {
409   switch (mask_type) {
410     case DIFFWTD_38:
411       diffwtd_mask_highbd(mask, 0, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
412                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
413       break;
414     case DIFFWTD_38_INV:
415       diffwtd_mask_highbd(mask, 1, 38, CONVERT_TO_SHORTPTR(src0), src0_stride,
416                           CONVERT_TO_SHORTPTR(src1), src1_stride, h, w, bd);
417       break;
418     default: assert(0);
419   }
420 }
421 
init_wedge_master_masks()422 static void init_wedge_master_masks() {
423   int i, j;
424   const int w = MASK_MASTER_SIZE;
425   const int h = MASK_MASTER_SIZE;
426   const int stride = MASK_MASTER_STRIDE;
427 // Note: index [0] stores the masters, and [1] its complement.
428 #if USE_PRECOMPUTED_WEDGE_MASK
429   // Generate prototype by shifting the masters
430   int shift = h / 4;
431   for (i = 0; i < h; i += 2) {
432     shift_copy(wedge_master_oblique_even,
433                &wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride], shift,
434                MASK_MASTER_SIZE);
435     shift--;
436     shift_copy(wedge_master_oblique_odd,
437                &wedge_mask_obl[0][WEDGE_OBLIQUE63][(i + 1) * stride], shift,
438                MASK_MASTER_SIZE);
439     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][i * stride],
440            wedge_master_vertical,
441            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
442     memcpy(&wedge_mask_obl[0][WEDGE_VERTICAL][(i + 1) * stride],
443            wedge_master_vertical,
444            MASK_MASTER_SIZE * sizeof(wedge_master_vertical[0]));
445   }
446 #else
447   static const double smoother_param = 2.85;
448   const int a[2] = { 2, 1 };
449   const double asqrt = sqrt(a[0] * a[0] + a[1] * a[1]);
450   for (i = 0; i < h; i++) {
451     for (j = 0; j < w; ++j) {
452       int x = (2 * j + 1 - w);
453       int y = (2 * i + 1 - h);
454       double d = (a[0] * x + a[1] * y) / asqrt;
455       const int msk = (int)rint((1.0 + tanh(d / smoother_param)) * 32);
456       wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j] = msk;
457       const int mskx = (int)rint((1.0 + tanh(x / smoother_param)) * 32);
458       wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j] = mskx;
459     }
460   }
461 #endif  // USE_PRECOMPUTED_WEDGE_MASK
462   for (i = 0; i < h; ++i) {
463     for (j = 0; j < w; ++j) {
464       const int msk = wedge_mask_obl[0][WEDGE_OBLIQUE63][i * stride + j];
465       wedge_mask_obl[0][WEDGE_OBLIQUE27][j * stride + i] = msk;
466       wedge_mask_obl[0][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
467           wedge_mask_obl[0][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] =
468               (1 << WEDGE_WEIGHT_BITS) - msk;
469       wedge_mask_obl[1][WEDGE_OBLIQUE63][i * stride + j] =
470           wedge_mask_obl[1][WEDGE_OBLIQUE27][j * stride + i] =
471               (1 << WEDGE_WEIGHT_BITS) - msk;
472       wedge_mask_obl[1][WEDGE_OBLIQUE117][i * stride + w - 1 - j] =
473           wedge_mask_obl[1][WEDGE_OBLIQUE153][(w - 1 - j) * stride + i] = msk;
474       const int mskx = wedge_mask_obl[0][WEDGE_VERTICAL][i * stride + j];
475       wedge_mask_obl[0][WEDGE_HORIZONTAL][j * stride + i] = mskx;
476       wedge_mask_obl[1][WEDGE_VERTICAL][i * stride + j] =
477           wedge_mask_obl[1][WEDGE_HORIZONTAL][j * stride + i] =
478               (1 << WEDGE_WEIGHT_BITS) - mskx;
479     }
480   }
481 }
482 
483 #if !USE_PRECOMPUTED_WEDGE_SIGN
484 // If the signs for the wedges for various blocksizes are
485 // inconsistent flip the sign flag. Do it only once for every
486 // wedge codebook.
init_wedge_signs()487 static void init_wedge_signs() {
488   BLOCK_SIZE sb_type;
489   memset(wedge_signflip_lookup, 0, sizeof(wedge_signflip_lookup));
490   for (sb_type = BLOCK_4X4; sb_type < BLOCK_SIZES_ALL; ++sb_type) {
491     const int bw = block_size_wide[sb_type];
492     const int bh = block_size_high[sb_type];
493     const wedge_params_type wedge_params = wedge_params_lookup[sb_type];
494     const int wbits = wedge_params.bits;
495     const int wtypes = 1 << wbits;
496     int i, w;
497     if (wbits) {
498       for (w = 0; w < wtypes; ++w) {
499         // Get the mask master, i.e. index [0]
500         const uint8_t *mask = get_wedge_mask_inplace(w, 0, sb_type);
501         int avg = 0;
502         for (i = 0; i < bw; ++i) avg += mask[i];
503         for (i = 1; i < bh; ++i) avg += mask[i * MASK_MASTER_STRIDE];
504         avg = (avg + (bw + bh - 1) / 2) / (bw + bh - 1);
505         // Default sign of this wedge is 1 if the average < 32, 0 otherwise.
506         // If default sign is 1:
507         //   If sign requested is 0, we need to flip the sign and return
508         //   the complement i.e. index [1] instead. If sign requested is 1
509         //   we need to flip the sign and return index [0] instead.
510         // If default sign is 0:
511         //   If sign requested is 0, we need to return index [0] the master
512         //   if sign requested is 1, we need to return the complement index [1]
513         //   instead.
514         wedge_params.signflip[w] = (avg < 32);
515       }
516     }
517   }
518 }
519 #endif  // !USE_PRECOMPUTED_WEDGE_SIGN
520 
init_wedge_masks()521 static void init_wedge_masks() {
522   uint8_t *dst = wedge_mask_buf;
523   BLOCK_SIZE bsize;
524   memset(wedge_masks, 0, sizeof(wedge_masks));
525   for (bsize = BLOCK_4X4; bsize < BLOCK_SIZES_ALL; ++bsize) {
526     const uint8_t *mask;
527     const int bw = block_size_wide[bsize];
528     const int bh = block_size_high[bsize];
529     const wedge_params_type *wedge_params = &wedge_params_lookup[bsize];
530     const int wbits = wedge_params->bits;
531     const int wtypes = 1 << wbits;
532     int w;
533     if (wbits == 0) continue;
534     for (w = 0; w < wtypes; ++w) {
535       mask = get_wedge_mask_inplace(w, 0, bsize);
536       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
537                         bh);
538       wedge_params->masks[0][w] = dst;
539       dst += bw * bh;
540 
541       mask = get_wedge_mask_inplace(w, 1, bsize);
542       aom_convolve_copy(mask, MASK_MASTER_STRIDE, dst, bw, NULL, 0, NULL, 0, bw,
543                         bh);
544       wedge_params->masks[1][w] = dst;
545       dst += bw * bh;
546     }
547     assert(sizeof(wedge_mask_buf) >= (size_t)(dst - wedge_mask_buf));
548   }
549 }
550 
551 // Equation of line: f(x, y) = a[0]*(x - a[2]*w/8) + a[1]*(y - a[3]*h/8) = 0
av1_init_wedge_masks()552 void av1_init_wedge_masks() {
553   init_wedge_master_masks();
554 #if !USE_PRECOMPUTED_WEDGE_SIGN
555   init_wedge_signs();
556 #endif  // !USE_PRECOMPUTED_WEDGE_SIGN
557   init_wedge_masks();
558 }
559 
build_masked_compound_no_round(uint8_t * dst,int dst_stride,const CONV_BUF_TYPE * src0,int src0_stride,const CONV_BUF_TYPE * src1,int src1_stride,const INTERINTER_COMPOUND_DATA * const comp_data,BLOCK_SIZE sb_type,int h,int w,ConvolveParams * conv_params,MACROBLOCKD * xd)560 static void build_masked_compound_no_round(
561     uint8_t *dst, int dst_stride, const CONV_BUF_TYPE *src0, int src0_stride,
562     const CONV_BUF_TYPE *src1, int src1_stride,
563     const INTERINTER_COMPOUND_DATA *const comp_data, BLOCK_SIZE sb_type, int h,
564     int w, ConvolveParams *conv_params, MACROBLOCKD *xd) {
565   // Derive subsampling from h and w passed in. May be refactored to
566   // pass in subsampling factors directly.
567   const int subh = (2 << mi_size_high_log2[sb_type]) == h;
568   const int subw = (2 << mi_size_wide_log2[sb_type]) == w;
569   const uint8_t *mask = av1_get_compound_type_mask(comp_data, sb_type);
570   if (is_cur_buf_hbd(xd)) {
571     aom_highbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
572                                   src1_stride, mask, block_size_wide[sb_type],
573                                   w, h, subw, subh, conv_params, xd->bd);
574   } else {
575     aom_lowbd_blend_a64_d16_mask(dst, dst_stride, src0, src0_stride, src1,
576                                  src1_stride, mask, block_size_wide[sb_type], w,
577                                  h, subw, subh, conv_params);
578   }
579 }
580 
av1_make_masked_inter_predictor(const uint8_t * pre,int pre_stride,uint8_t * dst,int dst_stride,const SubpelParams * subpel_params,const struct scale_factors * sf,int w,int h,ConvolveParams * conv_params,InterpFilters interp_filters,int plane,const WarpTypesAllowed * warp_types,int p_col,int p_row,int ref,MACROBLOCKD * xd,int can_use_previous)581 void av1_make_masked_inter_predictor(
582     const uint8_t *pre, int pre_stride, uint8_t *dst, int dst_stride,
583     const SubpelParams *subpel_params, const struct scale_factors *sf, int w,
584     int h, ConvolveParams *conv_params, InterpFilters interp_filters, int plane,
585     const WarpTypesAllowed *warp_types, int p_col, int p_row, int ref,
586     MACROBLOCKD *xd, int can_use_previous) {
587   MB_MODE_INFO *mi = xd->mi[0];
588   (void)dst;
589   (void)dst_stride;
590   mi->interinter_comp.seg_mask = xd->seg_mask;
591   const INTERINTER_COMPOUND_DATA *comp_data = &mi->interinter_comp;
592 
593 // We're going to call av1_make_inter_predictor to generate a prediction into
594 // a temporary buffer, then will blend that temporary buffer with that from
595 // the other reference.
596 //
597 #define INTER_PRED_BYTES_PER_PIXEL 2
598 
599   DECLARE_ALIGNED(32, uint8_t,
600                   tmp_buf[INTER_PRED_BYTES_PER_PIXEL * MAX_SB_SQUARE]);
601 #undef INTER_PRED_BYTES_PER_PIXEL
602 
603   uint8_t *tmp_dst = get_buf_by_bd(xd, tmp_buf);
604 
605   const int tmp_buf_stride = MAX_SB_SIZE;
606   CONV_BUF_TYPE *org_dst = conv_params->dst;
607   int org_dst_stride = conv_params->dst_stride;
608   CONV_BUF_TYPE *tmp_buf16 = (CONV_BUF_TYPE *)tmp_buf;
609   conv_params->dst = tmp_buf16;
610   conv_params->dst_stride = tmp_buf_stride;
611   assert(conv_params->do_average == 0);
612 
613   // This will generate a prediction in tmp_buf for the second reference
614   av1_make_inter_predictor(pre, pre_stride, tmp_dst, MAX_SB_SIZE, subpel_params,
615                            sf, w, h, conv_params, interp_filters, warp_types,
616                            p_col, p_row, plane, ref, mi, 0, xd,
617                            can_use_previous);
618 
619   if (!plane && comp_data->type == COMPOUND_DIFFWTD) {
620     av1_build_compound_diffwtd_mask_d16(
621         comp_data->seg_mask, comp_data->mask_type, org_dst, org_dst_stride,
622         tmp_buf16, tmp_buf_stride, h, w, conv_params, xd->bd);
623   }
624   build_masked_compound_no_round(dst, dst_stride, org_dst, org_dst_stride,
625                                  tmp_buf16, tmp_buf_stride, comp_data,
626                                  mi->sb_type, h, w, conv_params, xd);
627 }
628 
av1_dist_wtd_comp_weight_assign(const AV1_COMMON * cm,const MB_MODE_INFO * mbmi,int order_idx,int * fwd_offset,int * bck_offset,int * use_dist_wtd_comp_avg,int is_compound)629 void av1_dist_wtd_comp_weight_assign(const AV1_COMMON *cm,
630                                      const MB_MODE_INFO *mbmi, int order_idx,
631                                      int *fwd_offset, int *bck_offset,
632                                      int *use_dist_wtd_comp_avg,
633                                      int is_compound) {
634   assert(fwd_offset != NULL && bck_offset != NULL);
635   if (!is_compound || mbmi->compound_idx) {
636     *use_dist_wtd_comp_avg = 0;
637     return;
638   }
639 
640   *use_dist_wtd_comp_avg = 1;
641   const RefCntBuffer *const bck_buf = get_ref_frame_buf(cm, mbmi->ref_frame[0]);
642   const RefCntBuffer *const fwd_buf = get_ref_frame_buf(cm, mbmi->ref_frame[1]);
643   const int cur_frame_index = cm->cur_frame->order_hint;
644   int bck_frame_index = 0, fwd_frame_index = 0;
645 
646   if (bck_buf != NULL) bck_frame_index = bck_buf->order_hint;
647   if (fwd_buf != NULL) fwd_frame_index = fwd_buf->order_hint;
648 
649   int d0 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
650                                        fwd_frame_index, cur_frame_index)),
651                  0, MAX_FRAME_DISTANCE);
652   int d1 = clamp(abs(get_relative_dist(&cm->seq_params.order_hint_info,
653                                        cur_frame_index, bck_frame_index)),
654                  0, MAX_FRAME_DISTANCE);
655 
656   const int order = d0 <= d1;
657 
658   if (d0 == 0 || d1 == 0) {
659     *fwd_offset = quant_dist_lookup_table[order_idx][3][order];
660     *bck_offset = quant_dist_lookup_table[order_idx][3][1 - order];
661     return;
662   }
663 
664   int i;
665   for (i = 0; i < 3; ++i) {
666     int c0 = quant_dist_weight[i][order];
667     int c1 = quant_dist_weight[i][!order];
668     int d0_c0 = d0 * c0;
669     int d1_c1 = d1 * c1;
670     if ((d0 > d1 && d0_c0 < d1_c1) || (d0 <= d1 && d0_c0 > d1_c1)) break;
671   }
672 
673   *fwd_offset = quant_dist_lookup_table[order_idx][i][order];
674   *bck_offset = quant_dist_lookup_table[order_idx][i][1 - order];
675 }
676 
av1_setup_dst_planes(struct macroblockd_plane * planes,BLOCK_SIZE bsize,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const int plane_start,const int plane_end)677 void av1_setup_dst_planes(struct macroblockd_plane *planes, BLOCK_SIZE bsize,
678                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
679                           const int plane_start, const int plane_end) {
680   // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
681   // the static analysis warnings.
682   for (int i = plane_start; i < AOMMIN(plane_end, MAX_MB_PLANE); ++i) {
683     struct macroblockd_plane *const pd = &planes[i];
684     const int is_uv = i > 0;
685     setup_pred_plane(&pd->dst, bsize, src->buffers[i], src->crop_widths[is_uv],
686                      src->crop_heights[is_uv], src->strides[is_uv], mi_row,
687                      mi_col, NULL, pd->subsampling_x, pd->subsampling_y);
688   }
689 }
690 
av1_setup_pre_planes(MACROBLOCKD * xd,int idx,const YV12_BUFFER_CONFIG * src,int mi_row,int mi_col,const struct scale_factors * sf,const int num_planes)691 void av1_setup_pre_planes(MACROBLOCKD *xd, int idx,
692                           const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col,
693                           const struct scale_factors *sf,
694                           const int num_planes) {
695   if (src != NULL) {
696     // We use AOMMIN(num_planes, MAX_MB_PLANE) instead of num_planes to quiet
697     // the static analysis warnings.
698     for (int i = 0; i < AOMMIN(num_planes, MAX_MB_PLANE); ++i) {
699       struct macroblockd_plane *const pd = &xd->plane[i];
700       const int is_uv = i > 0;
701       setup_pred_plane(&pd->pre[idx], xd->mi[0]->sb_type, src->buffers[i],
702                        src->crop_widths[is_uv], src->crop_heights[is_uv],
703                        src->strides[is_uv], mi_row, mi_col, sf,
704                        pd->subsampling_x, pd->subsampling_y);
705     }
706   }
707 }
708 
709 // obmc_mask_N[overlap_position]
710 static const uint8_t obmc_mask_1[1] = { 64 };
711 DECLARE_ALIGNED(2, static const uint8_t, obmc_mask_2[2]) = { 45, 64 };
712 
713 DECLARE_ALIGNED(4, static const uint8_t, obmc_mask_4[4]) = { 39, 50, 59, 64 };
714 
715 static const uint8_t obmc_mask_8[8] = { 36, 42, 48, 53, 57, 61, 64, 64 };
716 
717 static const uint8_t obmc_mask_16[16] = { 34, 37, 40, 43, 46, 49, 52, 54,
718                                           56, 58, 60, 61, 64, 64, 64, 64 };
719 
720 static const uint8_t obmc_mask_32[32] = { 33, 35, 36, 38, 40, 41, 43, 44,
721                                           45, 47, 48, 50, 51, 52, 53, 55,
722                                           56, 57, 58, 59, 60, 60, 61, 62,
723                                           64, 64, 64, 64, 64, 64, 64, 64 };
724 
725 static const uint8_t obmc_mask_64[64] = {
726   33, 34, 35, 35, 36, 37, 38, 39, 40, 40, 41, 42, 43, 44, 44, 44,
727   45, 46, 47, 47, 48, 49, 50, 51, 51, 51, 52, 52, 53, 54, 55, 56,
728   56, 56, 57, 57, 58, 58, 59, 60, 60, 60, 60, 60, 61, 62, 62, 62,
729   62, 62, 63, 63, 63, 63, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64,
730 };
731 
av1_get_obmc_mask(int length)732 const uint8_t *av1_get_obmc_mask(int length) {
733   switch (length) {
734     case 1: return obmc_mask_1;
735     case 2: return obmc_mask_2;
736     case 4: return obmc_mask_4;
737     case 8: return obmc_mask_8;
738     case 16: return obmc_mask_16;
739     case 32: return obmc_mask_32;
740     case 64: return obmc_mask_64;
741     default: assert(0); return NULL;
742   }
743 }
744 
increment_int_ptr(MACROBLOCKD * xd,int rel_mi_rc,uint8_t mi_hw,MB_MODE_INFO * mi,void * fun_ctxt,const int num_planes)745 static INLINE void increment_int_ptr(MACROBLOCKD *xd, int rel_mi_rc,
746                                      uint8_t mi_hw, MB_MODE_INFO *mi,
747                                      void *fun_ctxt, const int num_planes) {
748   (void)xd;
749   (void)rel_mi_rc;
750   (void)mi_hw;
751   (void)mi;
752   ++*(int *)fun_ctxt;
753   (void)num_planes;
754 }
755 
av1_count_overlappable_neighbors(const AV1_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col)756 void av1_count_overlappable_neighbors(const AV1_COMMON *cm, MACROBLOCKD *xd,
757                                       int mi_row, int mi_col) {
758   MB_MODE_INFO *mbmi = xd->mi[0];
759 
760   mbmi->overlappable_neighbors[0] = 0;
761   mbmi->overlappable_neighbors[1] = 0;
762 
763   if (!is_motion_variation_allowed_bsize(mbmi->sb_type)) return;
764 
765   foreach_overlappable_nb_above(cm, xd, mi_col, INT_MAX, increment_int_ptr,
766                                 &mbmi->overlappable_neighbors[0]);
767   foreach_overlappable_nb_left(cm, xd, mi_row, INT_MAX, increment_int_ptr,
768                                &mbmi->overlappable_neighbors[1]);
769 }
770 
771 // HW does not support < 4x4 prediction. To limit the bandwidth requirement, if
772 // block-size of current plane is smaller than 8x8, always only blend with the
773 // left neighbor(s) (skip blending with the above side).
774 #define DISABLE_CHROMA_U8X8_OBMC 0  // 0: one-sided obmc; 1: disable
775 
av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,const struct macroblockd_plane * pd,int dir)776 int av1_skip_u4x4_pred_in_obmc(BLOCK_SIZE bsize,
777                                const struct macroblockd_plane *pd, int dir) {
778   assert(is_motion_variation_allowed_bsize(bsize));
779 
780   const BLOCK_SIZE bsize_plane =
781       get_plane_block_size(bsize, pd->subsampling_x, pd->subsampling_y);
782   switch (bsize_plane) {
783 #if DISABLE_CHROMA_U8X8_OBMC
784     case BLOCK_4X4:
785     case BLOCK_8X4:
786     case BLOCK_4X8: return 1; break;
787 #else
788     case BLOCK_4X4:
789     case BLOCK_8X4:
790     case BLOCK_4X8: return dir == 0; break;
791 #endif
792     default: return 0;
793   }
794 }
795 
av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO * mbmi)796 void av1_modify_neighbor_predictor_for_obmc(MB_MODE_INFO *mbmi) {
797   mbmi->ref_frame[1] = NONE_FRAME;
798   mbmi->interinter_comp.type = COMPOUND_AVERAGE;
799 
800   return;
801 }
802 
803 struct obmc_inter_pred_ctxt {
804   uint8_t **adjacent;
805   int *adjacent_stride;
806 };
807 
build_obmc_inter_pred_above(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mi,void * fun_ctxt,const int num_planes)808 static INLINE void build_obmc_inter_pred_above(MACROBLOCKD *xd, int rel_mi_col,
809                                                uint8_t above_mi_width,
810                                                MB_MODE_INFO *above_mi,
811                                                void *fun_ctxt,
812                                                const int num_planes) {
813   (void)above_mi;
814   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
815   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
816   const int is_hbd = is_cur_buf_hbd(xd);
817   const int overlap =
818       AOMMIN(block_size_high[bsize], block_size_high[BLOCK_64X64]) >> 1;
819 
820   for (int plane = 0; plane < num_planes; ++plane) {
821     const struct macroblockd_plane *pd = &xd->plane[plane];
822     const int bw = (above_mi_width * MI_SIZE) >> pd->subsampling_x;
823     const int bh = overlap >> pd->subsampling_y;
824     const int plane_col = (rel_mi_col * MI_SIZE) >> pd->subsampling_x;
825 
826     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 0)) continue;
827 
828     const int dst_stride = pd->dst.stride;
829     uint8_t *const dst = &pd->dst.buf[plane_col];
830     const int tmp_stride = ctxt->adjacent_stride[plane];
831     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_col];
832     const uint8_t *const mask = av1_get_obmc_mask(bh);
833 
834     if (is_hbd)
835       aom_highbd_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp,
836                                  tmp_stride, mask, bw, bh, xd->bd);
837     else
838       aom_blend_a64_vmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
839                           mask, bw, bh);
840   }
841 }
842 
build_obmc_inter_pred_left(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mi,void * fun_ctxt,const int num_planes)843 static INLINE void build_obmc_inter_pred_left(MACROBLOCKD *xd, int rel_mi_row,
844                                               uint8_t left_mi_height,
845                                               MB_MODE_INFO *left_mi,
846                                               void *fun_ctxt,
847                                               const int num_planes) {
848   (void)left_mi;
849   struct obmc_inter_pred_ctxt *ctxt = (struct obmc_inter_pred_ctxt *)fun_ctxt;
850   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
851   const int overlap =
852       AOMMIN(block_size_wide[bsize], block_size_wide[BLOCK_64X64]) >> 1;
853   const int is_hbd = is_cur_buf_hbd(xd);
854 
855   for (int plane = 0; plane < num_planes; ++plane) {
856     const struct macroblockd_plane *pd = &xd->plane[plane];
857     const int bw = overlap >> pd->subsampling_x;
858     const int bh = (left_mi_height * MI_SIZE) >> pd->subsampling_y;
859     const int plane_row = (rel_mi_row * MI_SIZE) >> pd->subsampling_y;
860 
861     if (av1_skip_u4x4_pred_in_obmc(bsize, pd, 1)) continue;
862 
863     const int dst_stride = pd->dst.stride;
864     uint8_t *const dst = &pd->dst.buf[plane_row * dst_stride];
865     const int tmp_stride = ctxt->adjacent_stride[plane];
866     const uint8_t *const tmp = &ctxt->adjacent[plane][plane_row * tmp_stride];
867     const uint8_t *const mask = av1_get_obmc_mask(bw);
868 
869     if (is_hbd)
870       aom_highbd_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp,
871                                  tmp_stride, mask, bw, bh, xd->bd);
872     else
873       aom_blend_a64_hmask(dst, dst_stride, dst, dst_stride, tmp, tmp_stride,
874                           mask, bw, bh);
875   }
876 }
877 
878 // This function combines motion compensated predictions that are generated by
879 // top/left neighboring blocks' inter predictors with the regular inter
880 // prediction. We assume the original prediction (bmc) is stored in
881 // xd->plane[].dst.buf
av1_build_obmc_inter_prediction(const AV1_COMMON * cm,MACROBLOCKD * xd,int mi_row,int mi_col,uint8_t * above[MAX_MB_PLANE],int above_stride[MAX_MB_PLANE],uint8_t * left[MAX_MB_PLANE],int left_stride[MAX_MB_PLANE])882 void av1_build_obmc_inter_prediction(const AV1_COMMON *cm, MACROBLOCKD *xd,
883                                      int mi_row, int mi_col,
884                                      uint8_t *above[MAX_MB_PLANE],
885                                      int above_stride[MAX_MB_PLANE],
886                                      uint8_t *left[MAX_MB_PLANE],
887                                      int left_stride[MAX_MB_PLANE]) {
888   const BLOCK_SIZE bsize = xd->mi[0]->sb_type;
889 
890   // handle above row
891   struct obmc_inter_pred_ctxt ctxt_above = { above, above_stride };
892   foreach_overlappable_nb_above(cm, xd, mi_col,
893                                 max_neighbor_obmc[mi_size_wide_log2[bsize]],
894                                 build_obmc_inter_pred_above, &ctxt_above);
895 
896   // handle left column
897   struct obmc_inter_pred_ctxt ctxt_left = { left, left_stride };
898   foreach_overlappable_nb_left(cm, xd, mi_row,
899                                max_neighbor_obmc[mi_size_high_log2[bsize]],
900                                build_obmc_inter_pred_left, &ctxt_left);
901 }
902 
av1_setup_build_prediction_by_above_pred(MACROBLOCKD * xd,int rel_mi_col,uint8_t above_mi_width,MB_MODE_INFO * above_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)903 void av1_setup_build_prediction_by_above_pred(
904     MACROBLOCKD *xd, int rel_mi_col, uint8_t above_mi_width,
905     MB_MODE_INFO *above_mbmi, struct build_prediction_ctxt *ctxt,
906     const int num_planes) {
907   const BLOCK_SIZE a_bsize = AOMMAX(BLOCK_8X8, above_mbmi->sb_type);
908   const int above_mi_col = ctxt->mi_col + rel_mi_col;
909 
910   av1_modify_neighbor_predictor_for_obmc(above_mbmi);
911 
912   for (int j = 0; j < num_planes; ++j) {
913     struct macroblockd_plane *const pd = &xd->plane[j];
914     setup_pred_plane(&pd->dst, a_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
915                      ctxt->tmp_height[j], ctxt->tmp_stride[j], 0, rel_mi_col,
916                      NULL, pd->subsampling_x, pd->subsampling_y);
917   }
918 
919   const int num_refs = 1 + has_second_ref(above_mbmi);
920 
921   for (int ref = 0; ref < num_refs; ++ref) {
922     const MV_REFERENCE_FRAME frame = above_mbmi->ref_frame[ref];
923 
924     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
925     const struct scale_factors *const sf =
926         get_ref_scale_factors_const(ctxt->cm, frame);
927     xd->block_ref_scale_factors[ref] = sf;
928     if ((!av1_is_valid_scale(sf)))
929       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
930                          "Reference frame has invalid dimensions");
931     av1_setup_pre_planes(xd, ref, &ref_buf->buf, ctxt->mi_row, above_mi_col, sf,
932                          num_planes);
933   }
934 
935   xd->mb_to_left_edge = 8 * MI_SIZE * (-above_mi_col);
936   xd->mb_to_right_edge = ctxt->mb_to_far_edge +
937                          (xd->n4_w - rel_mi_col - above_mi_width) * MI_SIZE * 8;
938 }
939 
av1_setup_build_prediction_by_left_pred(MACROBLOCKD * xd,int rel_mi_row,uint8_t left_mi_height,MB_MODE_INFO * left_mbmi,struct build_prediction_ctxt * ctxt,const int num_planes)940 void av1_setup_build_prediction_by_left_pred(MACROBLOCKD *xd, int rel_mi_row,
941                                              uint8_t left_mi_height,
942                                              MB_MODE_INFO *left_mbmi,
943                                              struct build_prediction_ctxt *ctxt,
944                                              const int num_planes) {
945   const BLOCK_SIZE l_bsize = AOMMAX(BLOCK_8X8, left_mbmi->sb_type);
946   const int left_mi_row = ctxt->mi_row + rel_mi_row;
947 
948   av1_modify_neighbor_predictor_for_obmc(left_mbmi);
949 
950   for (int j = 0; j < num_planes; ++j) {
951     struct macroblockd_plane *const pd = &xd->plane[j];
952     setup_pred_plane(&pd->dst, l_bsize, ctxt->tmp_buf[j], ctxt->tmp_width[j],
953                      ctxt->tmp_height[j], ctxt->tmp_stride[j], rel_mi_row, 0,
954                      NULL, pd->subsampling_x, pd->subsampling_y);
955   }
956 
957   const int num_refs = 1 + has_second_ref(left_mbmi);
958 
959   for (int ref = 0; ref < num_refs; ++ref) {
960     const MV_REFERENCE_FRAME frame = left_mbmi->ref_frame[ref];
961 
962     const RefCntBuffer *const ref_buf = get_ref_frame_buf(ctxt->cm, frame);
963     const struct scale_factors *const ref_scale_factors =
964         get_ref_scale_factors_const(ctxt->cm, frame);
965 
966     xd->block_ref_scale_factors[ref] = ref_scale_factors;
967     if ((!av1_is_valid_scale(ref_scale_factors)))
968       aom_internal_error(xd->error_info, AOM_CODEC_UNSUP_BITSTREAM,
969                          "Reference frame has invalid dimensions");
970     av1_setup_pre_planes(xd, ref, &ref_buf->buf, left_mi_row, ctxt->mi_col,
971                          ref_scale_factors, num_planes);
972   }
973 
974   xd->mb_to_top_edge = 8 * MI_SIZE * (-left_mi_row);
975   xd->mb_to_bottom_edge =
976       ctxt->mb_to_far_edge +
977       (xd->n4_h - rel_mi_row - left_mi_height) * MI_SIZE * 8;
978 }
979 
980 /* clang-format off */
981 static const uint8_t ii_weights1d[MAX_SB_SIZE] = {
982   60, 58, 56, 54, 52, 50, 48, 47, 45, 44, 42, 41, 39, 38, 37, 35, 34, 33, 32,
983   31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 22, 21, 20, 19, 19, 18, 18, 17, 16,
984   16, 15, 15, 14, 14, 13, 13, 12, 12, 12, 11, 11, 10, 10, 10,  9,  9,  9,  8,
985   8,  8,  8,  7,  7,  7,  7,  6,  6,  6,  6,  6,  5,  5,  5,  5,  5,  4,  4,
986   4,  4,  4,  4,  4,  4,  3,  3,  3,  3,  3,  3,  3,  3,  3,  2,  2,  2,  2,
987   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  1,  1,  1,  1,  1,  1,  1,  1,
988   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1
989 };
990 static uint8_t ii_size_scales[BLOCK_SIZES_ALL] = {
991     32, 16, 16, 16, 8, 8, 8, 4,
992     4,  4,  2,  2,  2, 1, 1, 1,
993     8,  8,  4,  4,  2, 2
994 };
995 /* clang-format on */
996 
build_smooth_interintra_mask(uint8_t * mask,int stride,BLOCK_SIZE plane_bsize,INTERINTRA_MODE mode)997 static void build_smooth_interintra_mask(uint8_t *mask, int stride,
998                                          BLOCK_SIZE plane_bsize,
999                                          INTERINTRA_MODE mode) {
1000   int i, j;
1001   const int bw = block_size_wide[plane_bsize];
1002   const int bh = block_size_high[plane_bsize];
1003   const int size_scale = ii_size_scales[plane_bsize];
1004 
1005   switch (mode) {
1006     case II_V_PRED:
1007       for (i = 0; i < bh; ++i) {
1008         memset(mask, ii_weights1d[i * size_scale], bw * sizeof(mask[0]));
1009         mask += stride;
1010       }
1011       break;
1012 
1013     case II_H_PRED:
1014       for (i = 0; i < bh; ++i) {
1015         for (j = 0; j < bw; ++j) mask[j] = ii_weights1d[j * size_scale];
1016         mask += stride;
1017       }
1018       break;
1019 
1020     case II_SMOOTH_PRED:
1021       for (i = 0; i < bh; ++i) {
1022         for (j = 0; j < bw; ++j)
1023           mask[j] = ii_weights1d[(i < j ? i : j) * size_scale];
1024         mask += stride;
1025       }
1026       break;
1027 
1028     case II_DC_PRED:
1029     default:
1030       for (i = 0; i < bh; ++i) {
1031         memset(mask, 32, bw * sizeof(mask[0]));
1032         mask += stride;
1033       }
1034       break;
1035   }
1036 }
1037 
combine_interintra(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int wedge_index,int wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred,int compstride,const uint8_t * interpred,int interstride,const uint8_t * intrapred,int intrastride)1038 static void combine_interintra(INTERINTRA_MODE mode,
1039                                int8_t use_wedge_interintra, int wedge_index,
1040                                int wedge_sign, BLOCK_SIZE bsize,
1041                                BLOCK_SIZE plane_bsize, uint8_t *comppred,
1042                                int compstride, const uint8_t *interpred,
1043                                int interstride, const uint8_t *intrapred,
1044                                int intrastride) {
1045   const int bw = block_size_wide[plane_bsize];
1046   const int bh = block_size_high[plane_bsize];
1047 
1048   if (use_wedge_interintra) {
1049     if (is_interintra_wedge_used(bsize)) {
1050       const uint8_t *mask =
1051           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1052       const int subw = 2 * mi_size_wide[bsize] == bw;
1053       const int subh = 2 * mi_size_high[bsize] == bh;
1054       aom_blend_a64_mask(comppred, compstride, intrapred, intrastride,
1055                          interpred, interstride, mask, block_size_wide[bsize],
1056                          bw, bh, subw, subh);
1057     }
1058     return;
1059   }
1060 
1061   uint8_t mask[MAX_SB_SQUARE];
1062   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1063   aom_blend_a64_mask(comppred, compstride, intrapred, intrastride, interpred,
1064                      interstride, mask, bw, bw, bh, 0, 0);
1065 }
1066 
combine_interintra_highbd(INTERINTRA_MODE mode,int8_t use_wedge_interintra,int wedge_index,int wedge_sign,BLOCK_SIZE bsize,BLOCK_SIZE plane_bsize,uint8_t * comppred8,int compstride,const uint8_t * interpred8,int interstride,const uint8_t * intrapred8,int intrastride,int bd)1067 static void combine_interintra_highbd(
1068     INTERINTRA_MODE mode, int8_t use_wedge_interintra, int wedge_index,
1069     int wedge_sign, BLOCK_SIZE bsize, BLOCK_SIZE plane_bsize,
1070     uint8_t *comppred8, int compstride, const uint8_t *interpred8,
1071     int interstride, const uint8_t *intrapred8, int intrastride, int bd) {
1072   const int bw = block_size_wide[plane_bsize];
1073   const int bh = block_size_high[plane_bsize];
1074 
1075   if (use_wedge_interintra) {
1076     if (is_interintra_wedge_used(bsize)) {
1077       const uint8_t *mask =
1078           av1_get_contiguous_soft_mask(wedge_index, wedge_sign, bsize);
1079       const int subh = 2 * mi_size_high[bsize] == bh;
1080       const int subw = 2 * mi_size_wide[bsize] == bw;
1081       aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1082                                 interpred8, interstride, mask,
1083                                 block_size_wide[bsize], bw, bh, subw, subh, bd);
1084     }
1085     return;
1086   }
1087 
1088   uint8_t mask[MAX_SB_SQUARE];
1089   build_smooth_interintra_mask(mask, bw, plane_bsize, mode);
1090   aom_highbd_blend_a64_mask(comppred8, compstride, intrapred8, intrastride,
1091                             interpred8, interstride, mask, bw, bw, bh, 0, 0,
1092                             bd);
1093 }
1094 
av1_build_intra_predictors_for_interintra(const AV1_COMMON * cm,MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const BUFFER_SET * ctx,uint8_t * dst,int dst_stride)1095 void av1_build_intra_predictors_for_interintra(const AV1_COMMON *cm,
1096                                                MACROBLOCKD *xd,
1097                                                BLOCK_SIZE bsize, int plane,
1098                                                const BUFFER_SET *ctx,
1099                                                uint8_t *dst, int dst_stride) {
1100   struct macroblockd_plane *const pd = &xd->plane[plane];
1101   const int ssx = xd->plane[plane].subsampling_x;
1102   const int ssy = xd->plane[plane].subsampling_y;
1103   BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1104   PREDICTION_MODE mode = interintra_to_intra_mode[xd->mi[0]->interintra_mode];
1105   assert(xd->mi[0]->angle_delta[PLANE_TYPE_Y] == 0);
1106   assert(xd->mi[0]->angle_delta[PLANE_TYPE_UV] == 0);
1107   assert(xd->mi[0]->filter_intra_mode_info.use_filter_intra == 0);
1108   assert(xd->mi[0]->use_intrabc == 0);
1109 
1110   av1_predict_intra_block(cm, xd, pd->width, pd->height,
1111                           max_txsize_rect_lookup[plane_bsize], mode, 0, 0,
1112                           FILTER_INTRA_MODES, ctx->plane[plane],
1113                           ctx->stride[plane], dst, dst_stride, 0, 0, plane);
1114 }
1115 
av1_combine_interintra(MACROBLOCKD * xd,BLOCK_SIZE bsize,int plane,const uint8_t * inter_pred,int inter_stride,const uint8_t * intra_pred,int intra_stride)1116 void av1_combine_interintra(MACROBLOCKD *xd, BLOCK_SIZE bsize, int plane,
1117                             const uint8_t *inter_pred, int inter_stride,
1118                             const uint8_t *intra_pred, int intra_stride) {
1119   const int ssx = xd->plane[plane].subsampling_x;
1120   const int ssy = xd->plane[plane].subsampling_y;
1121   const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, ssx, ssy);
1122   if (is_cur_buf_hbd(xd)) {
1123     combine_interintra_highbd(
1124         xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1125         xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
1126         bsize, plane_bsize, xd->plane[plane].dst.buf,
1127         xd->plane[plane].dst.stride, inter_pred, inter_stride, intra_pred,
1128         intra_stride, xd->bd);
1129     return;
1130   }
1131   combine_interintra(
1132       xd->mi[0]->interintra_mode, xd->mi[0]->use_wedge_interintra,
1133       xd->mi[0]->interintra_wedge_index, xd->mi[0]->interintra_wedge_sign,
1134       bsize, plane_bsize, xd->plane[plane].dst.buf, xd->plane[plane].dst.stride,
1135       inter_pred, inter_stride, intra_pred, intra_stride);
1136 }
1137 
1138 // build interintra_predictors for one plane
av1_build_interintra_predictors_sbp(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * pred,int stride,const BUFFER_SET * ctx,int plane,BLOCK_SIZE bsize)1139 void av1_build_interintra_predictors_sbp(const AV1_COMMON *cm, MACROBLOCKD *xd,
1140                                          uint8_t *pred, int stride,
1141                                          const BUFFER_SET *ctx, int plane,
1142                                          BLOCK_SIZE bsize) {
1143   if (is_cur_buf_hbd(xd)) {
1144     DECLARE_ALIGNED(16, uint16_t, intrapredictor[MAX_SB_SQUARE]);
1145     av1_build_intra_predictors_for_interintra(
1146         cm, xd, bsize, plane, ctx, CONVERT_TO_BYTEPTR(intrapredictor),
1147         MAX_SB_SIZE);
1148     av1_combine_interintra(xd, bsize, plane, pred, stride,
1149                            CONVERT_TO_BYTEPTR(intrapredictor), MAX_SB_SIZE);
1150   } else {
1151     DECLARE_ALIGNED(16, uint8_t, intrapredictor[MAX_SB_SQUARE]);
1152     av1_build_intra_predictors_for_interintra(cm, xd, bsize, plane, ctx,
1153                                               intrapredictor, MAX_SB_SIZE);
1154     av1_combine_interintra(xd, bsize, plane, pred, stride, intrapredictor,
1155                            MAX_SB_SIZE);
1156   }
1157 }
1158 
av1_build_interintra_predictors_sbuv(const AV1_COMMON * cm,MACROBLOCKD * xd,uint8_t * upred,uint8_t * vpred,int ustride,int vstride,const BUFFER_SET * ctx,BLOCK_SIZE bsize)1159 void av1_build_interintra_predictors_sbuv(const AV1_COMMON *cm, MACROBLOCKD *xd,
1160                                           uint8_t *upred, uint8_t *vpred,
1161                                           int ustride, int vstride,
1162                                           const BUFFER_SET *ctx,
1163                                           BLOCK_SIZE bsize) {
1164   av1_build_interintra_predictors_sbp(cm, xd, upred, ustride, ctx, 1, bsize);
1165   av1_build_interintra_predictors_sbp(cm, xd, vpred, vstride, ctx, 2, bsize);
1166 }
1167