1 /*
2  * Copyright (c) 2016, Alliance for Open Media. All rights reserved
3  *
4  * This source code is subject to the terms of the BSD 2 Clause License and
5  * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
6  * was not distributed with this source code in the LICENSE file, you can
7  * obtain it at www.aomedia.org/license/software. If the Alliance for Open
8  * Media Patent License 1.0 was not distributed with this source code in the
9  * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
10  */
11 
12 #include <assert.h>
13 #include <math.h>
14 #include <string.h>
15 
16 #include "config/aom_scale_rtcd.h"
17 
18 #include "aom/aom_integer.h"
19 #include "av1/common/cdef.h"
20 #include "av1/common/cdef_block.h"
21 #include "av1/common/onyxc_int.h"
22 #include "av1/common/reconinter.h"
23 
sb_all_skip(const AV1_COMMON * const cm,int mi_row,int mi_col)24 int sb_all_skip(const AV1_COMMON *const cm, int mi_row, int mi_col) {
25   int maxc, maxr;
26   int skip = 1;
27   maxc = cm->mi_cols - mi_col;
28   maxr = cm->mi_rows - mi_row;
29 
30   maxr = AOMMIN(maxr, MI_SIZE_64X64);
31   maxc = AOMMIN(maxc, MI_SIZE_64X64);
32 
33   for (int r = 0; r < maxr; r++) {
34     for (int c = 0; c < maxc; c++) {
35       skip =
36           skip &&
37           cm->mi_grid_visible[(mi_row + r) * cm->mi_stride + mi_col + c]->skip;
38     }
39   }
40   return skip;
41 }
42 
is_8x8_block_skip(MB_MODE_INFO ** grid,int mi_row,int mi_col,int mi_stride)43 static int is_8x8_block_skip(MB_MODE_INFO **grid, int mi_row, int mi_col,
44                              int mi_stride) {
45   int is_skip = 1;
46   for (int r = 0; r < mi_size_high[BLOCK_8X8]; ++r)
47     for (int c = 0; c < mi_size_wide[BLOCK_8X8]; ++c)
48       is_skip &= grid[(mi_row + r) * mi_stride + (mi_col + c)]->skip;
49 
50   return is_skip;
51 }
52 
sb_compute_cdef_list(const AV1_COMMON * const cm,int mi_row,int mi_col,cdef_list * dlist,BLOCK_SIZE bs)53 int sb_compute_cdef_list(const AV1_COMMON *const cm, int mi_row, int mi_col,
54                          cdef_list *dlist, BLOCK_SIZE bs) {
55   MB_MODE_INFO **grid = cm->mi_grid_visible;
56   int maxc = cm->mi_cols - mi_col;
57   int maxr = cm->mi_rows - mi_row;
58 
59   if (bs == BLOCK_128X128 || bs == BLOCK_128X64)
60     maxc = AOMMIN(maxc, MI_SIZE_128X128);
61   else
62     maxc = AOMMIN(maxc, MI_SIZE_64X64);
63   if (bs == BLOCK_128X128 || bs == BLOCK_64X128)
64     maxr = AOMMIN(maxr, MI_SIZE_128X128);
65   else
66     maxr = AOMMIN(maxr, MI_SIZE_64X64);
67 
68   const int r_step = mi_size_high[BLOCK_8X8];
69   const int c_step = mi_size_wide[BLOCK_8X8];
70   const int r_shift = (r_step == 2);
71   const int c_shift = (c_step == 2);
72 
73   assert(r_step == 1 || r_step == 2);
74   assert(c_step == 1 || c_step == 2);
75 
76   int count = 0;
77 
78   for (int r = 0; r < maxr; r += r_step) {
79     for (int c = 0; c < maxc; c += c_step) {
80       if (!is_8x8_block_skip(grid, mi_row + r, mi_col + c, cm->mi_stride)) {
81         dlist[count].by = r >> r_shift;
82         dlist[count].bx = c >> c_shift;
83         count++;
84       }
85     }
86   }
87   return count;
88 }
89 
copy_rect8_8bit_to_16bit_c(uint16_t * dst,int dstride,const uint8_t * src,int sstride,int v,int h)90 void copy_rect8_8bit_to_16bit_c(uint16_t *dst, int dstride, const uint8_t *src,
91                                 int sstride, int v, int h) {
92   for (int i = 0; i < v; i++) {
93     for (int j = 0; j < h; j++) {
94       dst[i * dstride + j] = src[i * sstride + j];
95     }
96   }
97 }
98 
copy_rect8_16bit_to_16bit_c(uint16_t * dst,int dstride,const uint16_t * src,int sstride,int v,int h)99 void copy_rect8_16bit_to_16bit_c(uint16_t *dst, int dstride,
100                                  const uint16_t *src, int sstride, int v,
101                                  int h) {
102   for (int i = 0; i < v; i++) {
103     for (int j = 0; j < h; j++) {
104       dst[i * dstride + j] = src[i * sstride + j];
105     }
106   }
107 }
108 
copy_sb8_16(AV1_COMMON * cm,uint16_t * dst,int dstride,const uint8_t * src,int src_voffset,int src_hoffset,int sstride,int vsize,int hsize)109 static void copy_sb8_16(AV1_COMMON *cm, uint16_t *dst, int dstride,
110                         const uint8_t *src, int src_voffset, int src_hoffset,
111                         int sstride, int vsize, int hsize) {
112   if (cm->seq_params.use_highbitdepth) {
113     const uint16_t *base =
114         &CONVERT_TO_SHORTPTR(src)[src_voffset * sstride + src_hoffset];
115     copy_rect8_16bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
116   } else {
117     const uint8_t *base = &src[src_voffset * sstride + src_hoffset];
118     copy_rect8_8bit_to_16bit(dst, dstride, base, sstride, vsize, hsize);
119   }
120 }
121 
fill_rect(uint16_t * dst,int dstride,int v,int h,uint16_t x)122 static INLINE void fill_rect(uint16_t *dst, int dstride, int v, int h,
123                              uint16_t x) {
124   for (int i = 0; i < v; i++) {
125     for (int j = 0; j < h; j++) {
126       dst[i * dstride + j] = x;
127     }
128   }
129 }
130 
copy_rect(uint16_t * dst,int dstride,const uint16_t * src,int sstride,int v,int h)131 static INLINE void copy_rect(uint16_t *dst, int dstride, const uint16_t *src,
132                              int sstride, int v, int h) {
133   for (int i = 0; i < v; i++) {
134     for (int j = 0; j < h; j++) {
135       dst[i * dstride + j] = src[i * sstride + j];
136     }
137   }
138 }
139 
av1_cdef_frame(YV12_BUFFER_CONFIG * frame,AV1_COMMON * cm,MACROBLOCKD * xd)140 void av1_cdef_frame(YV12_BUFFER_CONFIG *frame, AV1_COMMON *cm,
141                     MACROBLOCKD *xd) {
142   const CdefInfo *const cdef_info = &cm->cdef_info;
143   const int num_planes = av1_num_planes(cm);
144   DECLARE_ALIGNED(16, uint16_t, src[CDEF_INBUF_SIZE]);
145   uint16_t *linebuf[3];
146   uint16_t *colbuf[3];
147   cdef_list dlist[MI_SIZE_64X64 * MI_SIZE_64X64];
148   unsigned char *row_cdef, *prev_row_cdef, *curr_row_cdef;
149   int cdef_count;
150   int dir[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
151   int var[CDEF_NBLOCKS][CDEF_NBLOCKS] = { { 0 } };
152   int mi_wide_l2[3];
153   int mi_high_l2[3];
154   int xdec[3];
155   int ydec[3];
156   int coeff_shift = AOMMAX(cm->seq_params.bit_depth - 8, 0);
157   const int nvfb = (cm->mi_rows + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
158   const int nhfb = (cm->mi_cols + MI_SIZE_64X64 - 1) / MI_SIZE_64X64;
159   av1_setup_dst_planes(xd->plane, cm->seq_params.sb_size, frame, 0, 0, 0,
160                        num_planes);
161   row_cdef = aom_malloc(sizeof(*row_cdef) * (nhfb + 2) * 2);
162   memset(row_cdef, 1, sizeof(*row_cdef) * (nhfb + 2) * 2);
163   prev_row_cdef = row_cdef + 1;
164   curr_row_cdef = prev_row_cdef + nhfb + 2;
165   for (int pli = 0; pli < num_planes; pli++) {
166     xdec[pli] = xd->plane[pli].subsampling_x;
167     ydec[pli] = xd->plane[pli].subsampling_y;
168     mi_wide_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_x;
169     mi_high_l2[pli] = MI_SIZE_LOG2 - xd->plane[pli].subsampling_y;
170   }
171   const int stride = (cm->mi_cols << MI_SIZE_LOG2) + 2 * CDEF_HBORDER;
172   for (int pli = 0; pli < num_planes; pli++) {
173     linebuf[pli] = aom_malloc(sizeof(*linebuf) * CDEF_VBORDER * stride);
174     colbuf[pli] =
175         aom_malloc(sizeof(*colbuf) *
176                    ((CDEF_BLOCKSIZE << mi_high_l2[pli]) + 2 * CDEF_VBORDER) *
177                    CDEF_HBORDER);
178   }
179   for (int fbr = 0; fbr < nvfb; fbr++) {
180     for (int pli = 0; pli < num_planes; pli++) {
181       const int block_height =
182           (MI_SIZE_64X64 << mi_high_l2[pli]) + 2 * CDEF_VBORDER;
183       fill_rect(colbuf[pli], CDEF_HBORDER, block_height, CDEF_HBORDER,
184                 CDEF_VERY_LARGE);
185     }
186     int cdef_left = 1;
187     for (int fbc = 0; fbc < nhfb; fbc++) {
188       int level, sec_strength;
189       int uv_level, uv_sec_strength;
190       int nhb, nvb;
191       int cstart = 0;
192       curr_row_cdef[fbc] = 0;
193       if (cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
194                               MI_SIZE_64X64 * fbc] == NULL ||
195           cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
196                               MI_SIZE_64X64 * fbc]
197                   ->cdef_strength == -1) {
198         cdef_left = 0;
199         continue;
200       }
201       if (!cdef_left) cstart = -CDEF_HBORDER;
202       nhb = AOMMIN(MI_SIZE_64X64, cm->mi_cols - MI_SIZE_64X64 * fbc);
203       nvb = AOMMIN(MI_SIZE_64X64, cm->mi_rows - MI_SIZE_64X64 * fbr);
204       int frame_top, frame_left, frame_bottom, frame_right;
205 
206       int mi_row = MI_SIZE_64X64 * fbr;
207       int mi_col = MI_SIZE_64X64 * fbc;
208       // for the current filter block, it's top left corner mi structure (mi_tl)
209       // is first accessed to check whether the top and left boundaries are
210       // frame boundaries. Then bottom-left and top-right mi structures are
211       // accessed to check whether the bottom and right boundaries
212       // (respectively) are frame boundaries.
213       //
214       // Note that we can't just check the bottom-right mi structure - eg. if
215       // we're at the right-hand edge of the frame but not the bottom, then
216       // the bottom-right mi is NULL but the bottom-left is not.
217       frame_top = (mi_row == 0) ? 1 : 0;
218       frame_left = (mi_col == 0) ? 1 : 0;
219 
220       if (fbr != nvfb - 1)
221         frame_bottom = (mi_row + MI_SIZE_64X64 == cm->mi_rows) ? 1 : 0;
222       else
223         frame_bottom = 1;
224 
225       if (fbc != nhfb - 1)
226         frame_right = (mi_col + MI_SIZE_64X64 == cm->mi_cols) ? 1 : 0;
227       else
228         frame_right = 1;
229 
230       const int mbmi_cdef_strength =
231           cm->mi_grid_visible[MI_SIZE_64X64 * fbr * cm->mi_stride +
232                               MI_SIZE_64X64 * fbc]
233               ->cdef_strength;
234       level =
235           cdef_info->cdef_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
236       sec_strength =
237           cdef_info->cdef_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
238       sec_strength += sec_strength == 3;
239       uv_level =
240           cdef_info->cdef_uv_strengths[mbmi_cdef_strength] / CDEF_SEC_STRENGTHS;
241       uv_sec_strength =
242           cdef_info->cdef_uv_strengths[mbmi_cdef_strength] % CDEF_SEC_STRENGTHS;
243       uv_sec_strength += uv_sec_strength == 3;
244       if ((level == 0 && sec_strength == 0 && uv_level == 0 &&
245            uv_sec_strength == 0) ||
246           (cdef_count = sb_compute_cdef_list(cm, fbr * MI_SIZE_64X64,
247                                              fbc * MI_SIZE_64X64, dlist,
248                                              BLOCK_64X64)) == 0) {
249         cdef_left = 0;
250         continue;
251       }
252 
253       curr_row_cdef[fbc] = 1;
254       for (int pli = 0; pli < num_planes; pli++) {
255         int coffset;
256         int rend, cend;
257         int pri_damping = cdef_info->cdef_pri_damping;
258         int sec_damping = cdef_info->cdef_sec_damping;
259         int hsize = nhb << mi_wide_l2[pli];
260         int vsize = nvb << mi_high_l2[pli];
261 
262         if (pli) {
263           level = uv_level;
264           sec_strength = uv_sec_strength;
265         }
266 
267         if (fbc == nhfb - 1)
268           cend = hsize;
269         else
270           cend = hsize + CDEF_HBORDER;
271 
272         if (fbr == nvfb - 1)
273           rend = vsize;
274         else
275           rend = vsize + CDEF_VBORDER;
276 
277         coffset = fbc * MI_SIZE_64X64 << mi_wide_l2[pli];
278         if (fbc == nhfb - 1) {
279           /* On the last superblock column, fill in the right border with
280              CDEF_VERY_LARGE to avoid filtering with the outside. */
281           fill_rect(&src[cend + CDEF_HBORDER], CDEF_BSTRIDE,
282                     rend + CDEF_VBORDER, hsize + CDEF_HBORDER - cend,
283                     CDEF_VERY_LARGE);
284         }
285         if (fbr == nvfb - 1) {
286           /* On the last superblock row, fill in the bottom border with
287              CDEF_VERY_LARGE to avoid filtering with the outside. */
288           fill_rect(&src[(rend + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
289                     CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
290         }
291         /* Copy in the pixels we need from the current superblock for
292            deringing.*/
293         copy_sb8_16(cm,
294                     &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER + cstart],
295                     CDEF_BSTRIDE, xd->plane[pli].dst.buf,
296                     (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr, coffset + cstart,
297                     xd->plane[pli].dst.stride, rend, cend - cstart);
298         if (!prev_row_cdef[fbc]) {
299           copy_sb8_16(cm, &src[CDEF_HBORDER], CDEF_BSTRIDE,
300                       xd->plane[pli].dst.buf,
301                       (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
302                       coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
303         } else if (fbr > 0) {
304           copy_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, &linebuf[pli][coffset],
305                     stride, CDEF_VBORDER, hsize);
306         } else {
307           fill_rect(&src[CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER, hsize,
308                     CDEF_VERY_LARGE);
309         }
310         if (!prev_row_cdef[fbc - 1]) {
311           copy_sb8_16(cm, src, CDEF_BSTRIDE, xd->plane[pli].dst.buf,
312                       (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
313                       coffset - CDEF_HBORDER, xd->plane[pli].dst.stride,
314                       CDEF_VBORDER, CDEF_HBORDER);
315         } else if (fbr > 0 && fbc > 0) {
316           copy_rect(src, CDEF_BSTRIDE, &linebuf[pli][coffset - CDEF_HBORDER],
317                     stride, CDEF_VBORDER, CDEF_HBORDER);
318         } else {
319           fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, CDEF_HBORDER,
320                     CDEF_VERY_LARGE);
321         }
322         if (!prev_row_cdef[fbc + 1]) {
323           copy_sb8_16(cm, &src[CDEF_HBORDER + (nhb << mi_wide_l2[pli])],
324                       CDEF_BSTRIDE, xd->plane[pli].dst.buf,
325                       (MI_SIZE_64X64 << mi_high_l2[pli]) * fbr - CDEF_VBORDER,
326                       coffset + hsize, xd->plane[pli].dst.stride, CDEF_VBORDER,
327                       CDEF_HBORDER);
328         } else if (fbr > 0 && fbc < nhfb - 1) {
329           copy_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
330                     &linebuf[pli][coffset + hsize], stride, CDEF_VBORDER,
331                     CDEF_HBORDER);
332         } else {
333           fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE, CDEF_VBORDER,
334                     CDEF_HBORDER, CDEF_VERY_LARGE);
335         }
336         if (cdef_left) {
337           /* If we deringed the superblock on the left then we need to copy in
338              saved pixels. */
339           copy_rect(src, CDEF_BSTRIDE, colbuf[pli], CDEF_HBORDER,
340                     rend + CDEF_VBORDER, CDEF_HBORDER);
341         }
342         /* Saving pixels in case we need to dering the superblock on the
343             right. */
344         copy_rect(colbuf[pli], CDEF_HBORDER, src + hsize, CDEF_BSTRIDE,
345                   rend + CDEF_VBORDER, CDEF_HBORDER);
346         copy_sb8_16(
347             cm, &linebuf[pli][coffset], stride, xd->plane[pli].dst.buf,
348             (MI_SIZE_64X64 << mi_high_l2[pli]) * (fbr + 1) - CDEF_VBORDER,
349             coffset, xd->plane[pli].dst.stride, CDEF_VBORDER, hsize);
350 
351         if (frame_top) {
352           fill_rect(src, CDEF_BSTRIDE, CDEF_VBORDER, hsize + 2 * CDEF_HBORDER,
353                     CDEF_VERY_LARGE);
354         }
355         if (frame_left) {
356           fill_rect(src, CDEF_BSTRIDE, vsize + 2 * CDEF_VBORDER, CDEF_HBORDER,
357                     CDEF_VERY_LARGE);
358         }
359         if (frame_bottom) {
360           fill_rect(&src[(vsize + CDEF_VBORDER) * CDEF_BSTRIDE], CDEF_BSTRIDE,
361                     CDEF_VBORDER, hsize + 2 * CDEF_HBORDER, CDEF_VERY_LARGE);
362         }
363         if (frame_right) {
364           fill_rect(&src[hsize + CDEF_HBORDER], CDEF_BSTRIDE,
365                     vsize + 2 * CDEF_VBORDER, CDEF_HBORDER, CDEF_VERY_LARGE);
366         }
367 
368         if (cm->seq_params.use_highbitdepth) {
369           cdef_filter_fb(
370               NULL,
371               &CONVERT_TO_SHORTPTR(
372                   xd->plane[pli]
373                       .dst.buf)[xd->plane[pli].dst.stride *
374                                     (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
375                                 (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
376               xd->plane[pli].dst.stride,
377               &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
378               ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
379               sec_strength, pri_damping, sec_damping, coeff_shift);
380         } else {
381           cdef_filter_fb(
382               &xd->plane[pli]
383                    .dst.buf[xd->plane[pli].dst.stride *
384                                 (MI_SIZE_64X64 * fbr << mi_high_l2[pli]) +
385                             (fbc * MI_SIZE_64X64 << mi_wide_l2[pli])],
386               NULL, xd->plane[pli].dst.stride,
387               &src[CDEF_VBORDER * CDEF_BSTRIDE + CDEF_HBORDER], xdec[pli],
388               ydec[pli], dir, NULL, var, pli, dlist, cdef_count, level,
389               sec_strength, pri_damping, sec_damping, coeff_shift);
390         }
391       }
392       cdef_left = 1;
393     }
394     {
395       unsigned char *tmp = prev_row_cdef;
396       prev_row_cdef = curr_row_cdef;
397       curr_row_cdef = tmp;
398     }
399   }
400   aom_free(row_cdef);
401   for (int pli = 0; pli < num_planes; pli++) {
402     aom_free(linebuf[pli]);
403     aom_free(colbuf[pli]);
404   }
405 }
406