1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 /* MFQE: Multiframe Quality Enhancement
13  * In rate limited situations keyframes may cause significant visual artifacts
14  * commonly referred to as "popping." This file implements a postproccesing
15  * algorithm which blends data from the preceeding frame when there is no
16  * motion and the q from the previous frame is lower which indicates that it is
17  * higher quality.
18  */
19 
20 #include "postproc.h"
21 #include "variance.h"
22 #include "vpx_mem/vpx_mem.h"
23 #include "vp8_rtcd.h"
24 #include "vpx_scale/yv12config.h"
25 
26 #include <limits.h>
27 #include <stdlib.h>
28 
filter_by_weight(unsigned char * src,int src_stride,unsigned char * dst,int dst_stride,int block_size,int src_weight)29 static void filter_by_weight(unsigned char *src, int src_stride,
30                              unsigned char *dst, int dst_stride,
31                              int block_size, int src_weight)
32 {
33     int dst_weight = (1 << MFQE_PRECISION) - src_weight;
34     int rounding_bit = 1 << (MFQE_PRECISION - 1);
35     int r, c;
36 
37     for (r = 0; r < block_size; r++)
38     {
39         for (c = 0; c < block_size; c++)
40         {
41             dst[c] = (src[c] * src_weight +
42                       dst[c] * dst_weight +
43                       rounding_bit) >> MFQE_PRECISION;
44         }
45         src += src_stride;
46         dst += dst_stride;
47     }
48 }
49 
vp8_filter_by_weight16x16_c(unsigned char * src,int src_stride,unsigned char * dst,int dst_stride,int src_weight)50 void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride,
51                                  unsigned char *dst, int dst_stride,
52                                  int src_weight)
53 {
54     filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight);
55 }
56 
vp8_filter_by_weight8x8_c(unsigned char * src,int src_stride,unsigned char * dst,int dst_stride,int src_weight)57 void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride,
58                                unsigned char *dst, int dst_stride,
59                                int src_weight)
60 {
61     filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight);
62 }
63 
vp8_filter_by_weight4x4_c(unsigned char * src,int src_stride,unsigned char * dst,int dst_stride,int src_weight)64 void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride,
65                                unsigned char *dst, int dst_stride,
66                                int src_weight)
67 {
68     filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight);
69 }
70 
apply_ifactor(unsigned char * y_src,int y_src_stride,unsigned char * y_dst,int y_dst_stride,unsigned char * u_src,unsigned char * v_src,int uv_src_stride,unsigned char * u_dst,unsigned char * v_dst,int uv_dst_stride,int block_size,int src_weight)71 static void apply_ifactor(unsigned char *y_src,
72                           int y_src_stride,
73                           unsigned char *y_dst,
74                           int y_dst_stride,
75                           unsigned char *u_src,
76                           unsigned char *v_src,
77                           int uv_src_stride,
78                           unsigned char *u_dst,
79                           unsigned char *v_dst,
80                           int uv_dst_stride,
81                           int block_size,
82                           int src_weight)
83 {
84     if (block_size == 16)
85     {
86         vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
87         vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
88         vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
89     }
90     else /* if (block_size == 8) */
91     {
92         vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight);
93         vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight);
94         vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight);
95     }
96 }
97 
int_sqrt(unsigned int x)98 static unsigned int int_sqrt(unsigned int x)
99 {
100     unsigned int y = x;
101     unsigned int guess;
102     int p = 1;
103     while (y>>=1) p++;
104     p>>=1;
105 
106     guess=0;
107     while (p>=0)
108     {
109         guess |= (1<<p);
110         if (x<guess*guess)
111             guess -= (1<<p);
112         p--;
113     }
114     /* choose between guess or guess+1 */
115     return guess+(guess*guess+guess+1<=x);
116 }
117 
118 #define USE_SSD
multiframe_quality_enhance_block(int blksize,int qcurr,int qprev,unsigned char * y,unsigned char * u,unsigned char * v,int y_stride,int uv_stride,unsigned char * yd,unsigned char * ud,unsigned char * vd,int yd_stride,int uvd_stride)119 static void multiframe_quality_enhance_block
120 (
121     int blksize, /* Currently only values supported are 16, 8 */
122     int qcurr,
123     int qprev,
124     unsigned char *y,
125     unsigned char *u,
126     unsigned char *v,
127     int y_stride,
128     int uv_stride,
129     unsigned char *yd,
130     unsigned char *ud,
131     unsigned char *vd,
132     int yd_stride,
133     int uvd_stride
134 )
135 {
136     static const unsigned char VP8_ZEROS[16]=
137     {
138          0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
139     };
140     int uvblksize = blksize >> 1;
141     int qdiff = qcurr - qprev;
142 
143     int i;
144     unsigned char *up;
145     unsigned char *udp;
146     unsigned char *vp;
147     unsigned char *vdp;
148 
149     unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk;
150 
151     if (blksize == 16)
152     {
153         actd = (vp8_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse)+128)>>8;
154         act = (vp8_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse)+128)>>8;
155 #ifdef USE_SSD
156         sad = (vp8_variance16x16(y, y_stride, yd, yd_stride, &sse));
157         sad = (sse + 128)>>8;
158         usad = (vp8_variance8x8(u, uv_stride, ud, uvd_stride, &sse));
159         usad = (sse + 32)>>6;
160         vsad = (vp8_variance8x8(v, uv_stride, vd, uvd_stride, &sse));
161         vsad = (sse + 32)>>6;
162 #else
163         sad = (vp8_sad16x16(y, y_stride, yd, yd_stride, UINT_MAX) + 128) >> 8;
164         usad = (vp8_sad8x8(u, uv_stride, ud, uvd_stride, UINT_MAX) + 32) >> 6;
165         vsad = (vp8_sad8x8(v, uv_stride, vd, uvd_stride, UINT_MAX)+ 32) >> 6;
166 #endif
167     }
168     else /* if (blksize == 8) */
169     {
170         actd = (vp8_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse)+32)>>6;
171         act = (vp8_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse)+32)>>6;
172 #ifdef USE_SSD
173         sad = (vp8_variance8x8(y, y_stride, yd, yd_stride, &sse));
174         sad = (sse + 32)>>6;
175         usad = (vp8_variance4x4(u, uv_stride, ud, uvd_stride, &sse));
176         usad = (sse + 8)>>4;
177         vsad = (vp8_variance4x4(v, uv_stride, vd, uvd_stride, &sse));
178         vsad = (sse + 8)>>4;
179 #else
180         sad = (vp8_sad8x8(y, y_stride, yd, yd_stride, UINT_MAX) + 32) >> 6;
181         usad = (vp8_sad4x4(u, uv_stride, ud, uvd_stride, UINT_MAX) + 8) >> 4;
182         vsad = (vp8_sad4x4(v, uv_stride, vd, uvd_stride, UINT_MAX) + 8) >> 4;
183 #endif
184     }
185 
186     actrisk = (actd > act * 5);
187 
188     /* thr = qdiff/16 + log2(act) + log4(qprev) */
189     thr = (qdiff >> 4);
190     while (actd >>= 1) thr++;
191     while (qprev >>= 2) thr++;
192 
193 #ifdef USE_SSD
194     thrsq = thr * thr;
195     if (sad < thrsq &&
196         /* additional checks for color mismatch and excessive addition of
197          * high-frequencies */
198         4 * usad < thrsq && 4 * vsad < thrsq && !actrisk)
199 #else
200     if (sad < thr &&
201         /* additional checks for color mismatch and excessive addition of
202          * high-frequencies */
203         2 * usad < thr && 2 * vsad < thr && !actrisk)
204 #endif
205     {
206         int ifactor;
207 #ifdef USE_SSD
208         /* TODO: optimize this later to not need sqr root */
209         sad = int_sqrt(sad);
210 #endif
211         ifactor = (sad << MFQE_PRECISION) / thr;
212         ifactor >>= (qdiff >> 5);
213 
214         if (ifactor)
215         {
216             apply_ifactor(y, y_stride, yd, yd_stride,
217                           u, v, uv_stride,
218                           ud, vd, uvd_stride,
219                           blksize, ifactor);
220         }
221     }
222     else  /* else implicitly copy from previous frame */
223     {
224         if (blksize == 16)
225         {
226             vp8_copy_mem16x16(y, y_stride, yd, yd_stride);
227             vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride);
228             vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride);
229         }
230         else  /* if (blksize == 8) */
231         {
232             vp8_copy_mem8x8(y, y_stride, yd, yd_stride);
233             for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride)
234                 vpx_memcpy(udp, up, uvblksize);
235             for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride)
236                 vpx_memcpy(vdp, vp, uvblksize);
237         }
238     }
239 }
240 
qualify_inter_mb(const MODE_INFO * mode_info_context,int * map)241 static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map)
242 {
243     if (mode_info_context->mbmi.mb_skip_coeff)
244         map[0] = map[1] = map[2] = map[3] = 1;
245     else if (mode_info_context->mbmi.mode==SPLITMV)
246     {
247         static int ndx[4][4] =
248         {
249             {0, 1, 4, 5},
250             {2, 3, 6, 7},
251             {8, 9, 12, 13},
252             {10, 11, 14, 15}
253         };
254         int i, j;
255         for (i=0; i<4; ++i)
256         {
257             map[i] = 1;
258             for (j=0; j<4 && map[j]; ++j)
259                 map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 &&
260                            mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2);
261         }
262     }
263     else
264     {
265         map[0] = map[1] = map[2] = map[3] =
266             (mode_info_context->mbmi.mode > B_PRED &&
267              abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 &&
268              abs(mode_info_context->mbmi.mv.as_mv.col) <= 2);
269     }
270     return (map[0]+map[1]+map[2]+map[3]);
271 }
272 
vp8_multiframe_quality_enhance(VP8_COMMON * cm)273 void vp8_multiframe_quality_enhance
274 (
275     VP8_COMMON *cm
276 )
277 {
278     YV12_BUFFER_CONFIG *show = cm->frame_to_show;
279     YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer;
280 
281     FRAME_TYPE frame_type = cm->frame_type;
282     /* Point at base of Mb MODE_INFO list has motion vectors etc */
283     const MODE_INFO *mode_info_context = cm->show_frame_mi;
284     int mb_row;
285     int mb_col;
286     int totmap, map[4];
287     int qcurr = cm->base_qindex;
288     int qprev = cm->postproc_state.last_base_qindex;
289 
290     unsigned char *y_ptr, *u_ptr, *v_ptr;
291     unsigned char *yd_ptr, *ud_ptr, *vd_ptr;
292 
293     /* Set up the buffer pointers */
294     y_ptr = show->y_buffer;
295     u_ptr = show->u_buffer;
296     v_ptr = show->v_buffer;
297     yd_ptr = dest->y_buffer;
298     ud_ptr = dest->u_buffer;
299     vd_ptr = dest->v_buffer;
300 
301     /* postprocess each macro block */
302     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
303     {
304         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
305         {
306             /* if motion is high there will likely be no benefit */
307             if (frame_type == INTER_FRAME) totmap = qualify_inter_mb(mode_info_context, map);
308             else totmap = (frame_type == KEY_FRAME ? 4 : 0);
309             if (totmap)
310             {
311                 if (totmap < 4)
312                 {
313                     int i, j;
314                     for (i=0; i<2; ++i)
315                         for (j=0; j<2; ++j)
316                         {
317                             if (map[i*2+j])
318                             {
319                                 multiframe_quality_enhance_block(8, qcurr, qprev,
320                                                                  y_ptr + 8*(i*show->y_stride+j),
321                                                                  u_ptr + 4*(i*show->uv_stride+j),
322                                                                  v_ptr + 4*(i*show->uv_stride+j),
323                                                                  show->y_stride,
324                                                                  show->uv_stride,
325                                                                  yd_ptr + 8*(i*dest->y_stride+j),
326                                                                  ud_ptr + 4*(i*dest->uv_stride+j),
327                                                                  vd_ptr + 4*(i*dest->uv_stride+j),
328                                                                  dest->y_stride,
329                                                                  dest->uv_stride);
330                             }
331                             else
332                             {
333                                 /* copy a 8x8 block */
334                                 int k;
335                                 unsigned char *up = u_ptr + 4*(i*show->uv_stride+j);
336                                 unsigned char *udp = ud_ptr + 4*(i*dest->uv_stride+j);
337                                 unsigned char *vp = v_ptr + 4*(i*show->uv_stride+j);
338                                 unsigned char *vdp = vd_ptr + 4*(i*dest->uv_stride+j);
339                                 vp8_copy_mem8x8(y_ptr + 8*(i*show->y_stride+j), show->y_stride,
340                                                 yd_ptr + 8*(i*dest->y_stride+j), dest->y_stride);
341                                 for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride,
342                                                         vp += show->uv_stride, vdp += dest->uv_stride)
343                                 {
344                                     vpx_memcpy(udp, up, 4);
345                                     vpx_memcpy(vdp, vp, 4);
346                                 }
347                             }
348                         }
349                 }
350                 else /* totmap = 4 */
351                 {
352                     multiframe_quality_enhance_block(16, qcurr, qprev, y_ptr,
353                                                      u_ptr, v_ptr,
354                                                      show->y_stride,
355                                                      show->uv_stride,
356                                                      yd_ptr, ud_ptr, vd_ptr,
357                                                      dest->y_stride,
358                                                      dest->uv_stride);
359                 }
360             }
361             else
362             {
363                 vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride);
364                 vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride);
365                 vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride);
366             }
367             y_ptr += 16;
368             u_ptr += 8;
369             v_ptr += 8;
370             yd_ptr += 16;
371             ud_ptr += 8;
372             vd_ptr += 8;
373             mode_info_context++;     /* step to next MB */
374         }
375 
376         y_ptr += show->y_stride  * 16 - 16 * cm->mb_cols;
377         u_ptr += show->uv_stride *  8 - 8 * cm->mb_cols;
378         v_ptr += show->uv_stride *  8 - 8 * cm->mb_cols;
379         yd_ptr += dest->y_stride  * 16 - 16 * cm->mb_cols;
380         ud_ptr += dest->uv_stride *  8 - 8 * cm->mb_cols;
381         vd_ptr += dest->uv_stride *  8 - 8 * cm->mb_cols;
382 
383         mode_info_context++;         /* Skip border mb */
384     }
385 }
386