1 /*
2  *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include <limits.h>
12 
13 #include "denoising.h"
14 
15 #include "vp8/common/reconinter.h"
16 #include "vpx/vpx_integer.h"
17 #include "vpx_mem/vpx_mem.h"
18 #include "vp8_rtcd.h"
19 
20 static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25;
21 /* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming
22  * var(noise) ~= 100.
23  */
24 static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20;
25 static const unsigned int SSE_THRESHOLD = 16 * 16 * 40;
26 static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 60;
27 
28 /*
29  * The filter function was modified to reduce the computational complexity.
30  * Step 1:
31  * Instead of applying tap coefficients for each pixel, we calculated the
32  * pixel adjustments vs. pixel diff value ahead of time.
33  *     adjustment = filtered_value - current_raw
34  *                = (filter_coefficient * diff + 128) >> 8
35  * where
36  *     filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3));
37  *     filter_coefficient += filter_coefficient /
38  *                           (3 + motion_magnitude_adjustment);
39  *     filter_coefficient is clamped to 0 ~ 255.
40  *
41  * Step 2:
42  * The adjustment vs. diff curve becomes flat very quick when diff increases.
43  * This allowed us to use only several levels to approximate the curve without
44  * changing the filtering algorithm too much.
45  * The adjustments were further corrected by checking the motion magnitude.
46  * The levels used are:
47  * diff       adjustment w/o motion correction   adjustment w/ motion correction
48  * [-255, -16]           -6                                   -7
49  * [-15, -8]             -4                                   -5
50  * [-7, -4]              -3                                   -4
51  * [-3, 3]               diff                                 diff
52  * [4, 7]                 3                                    4
53  * [8, 15]                4                                    5
54  * [16, 255]              6                                    7
55  */
56 
vp8_denoiser_filter_c(unsigned char * mc_running_avg_y,int mc_avg_y_stride,unsigned char * running_avg_y,int avg_y_stride,unsigned char * sig,int sig_stride,unsigned int motion_magnitude,int increase_denoising)57 int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride,
58                           unsigned char *running_avg_y, int avg_y_stride,
59                           unsigned char *sig, int sig_stride,
60                           unsigned int motion_magnitude,
61                           int increase_denoising)
62 {
63     unsigned char *running_avg_y_start = running_avg_y;
64     unsigned char *sig_start = sig;
65     int sum_diff_thresh;
66     int r, c;
67     int sum_diff = 0;
68     int adj_val[3] = {3, 4, 6};
69     int shift_inc1 = 0;
70     int shift_inc2 = 1;
71     int col_sum[16] = {0, 0, 0, 0,
72                        0, 0, 0, 0,
73                        0, 0, 0, 0,
74                        0, 0, 0, 0};
75     /* If motion_magnitude is small, making the denoiser more aggressive by
76      * increasing the adjustment for each level. Add another increment for
77      * blocks that are labeled for increase denoising. */
78     if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD)
79     {
80       if (increase_denoising) {
81         shift_inc1 = 1;
82         shift_inc2 = 2;
83       }
84       adj_val[0] += shift_inc2;
85       adj_val[1] += shift_inc2;
86       adj_val[2] += shift_inc2;
87     }
88 
89     for (r = 0; r < 16; ++r)
90     {
91         for (c = 0; c < 16; ++c)
92         {
93             int diff = 0;
94             int adjustment = 0;
95             int absdiff = 0;
96 
97             diff = mc_running_avg_y[c] - sig[c];
98             absdiff = abs(diff);
99 
100             // When |diff| <= |3 + shift_inc1|, use pixel value from
101             // last denoised raw.
102             if (absdiff <= 3 + shift_inc1)
103             {
104                 running_avg_y[c] = mc_running_avg_y[c];
105                 col_sum[c] += diff;
106             }
107             else
108             {
109                 if (absdiff >= 4 + shift_inc1 && absdiff <= 7)
110                     adjustment = adj_val[0];
111                 else if (absdiff >= 8 && absdiff <= 15)
112                     adjustment = adj_val[1];
113                 else
114                     adjustment = adj_val[2];
115 
116                 if (diff > 0)
117                 {
118                     if ((sig[c] + adjustment) > 255)
119                         running_avg_y[c] = 255;
120                     else
121                         running_avg_y[c] = sig[c] + adjustment;
122 
123                     col_sum[c] += adjustment;
124                 }
125                 else
126                 {
127                     if ((sig[c] - adjustment) < 0)
128                         running_avg_y[c] = 0;
129                     else
130                         running_avg_y[c] = sig[c] - adjustment;
131 
132                     col_sum[c] -= adjustment;
133                 }
134             }
135         }
136 
137         /* Update pointers for next iteration. */
138         sig += sig_stride;
139         mc_running_avg_y += mc_avg_y_stride;
140         running_avg_y += avg_y_stride;
141     }
142 
143     for (c = 0; c < 16; ++c) {
144       // Below we clip the value in the same way which SSE code use.
145       // When adopting aggressive denoiser, the adj_val for each pixel
146       // could be at most 8 (this is current max adjustment of the map).
147       // In SSE code, we calculate the sum of adj_val for
148       // the columns, so the sum could be upto 128(16 rows). However,
149       // the range of the value is -128 ~ 127 in SSE code, that's why
150       // we do this change in C code.
151       // We don't do this for UV denoiser, since there are only 8 rows,
152       // and max adjustments <= 8, so the sum of the columns will not
153       // exceed 64.
154       if (col_sum[c] >= 128) {
155         col_sum[c] = 127;
156       }
157       sum_diff += col_sum[c];
158     }
159 
160     sum_diff_thresh= SUM_DIFF_THRESHOLD;
161     if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH;
162     if (abs(sum_diff) > sum_diff_thresh) {
163       // Before returning to copy the block (i.e., apply no denoising), check
164       // if we can still apply some (weaker) temporal filtering to this block,
165       // that would otherwise not be denoised at all. Simplest is to apply
166       // an additional adjustment to running_avg_y to bring it closer to sig.
167       // The adjustment is capped by a maximum delta, and chosen such that
168       // in most cases the resulting sum_diff will be within the
169       // accceptable range given by sum_diff_thresh.
170 
171       // The delta is set by the excess of absolute pixel diff over threshold.
172       int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
173       // Only apply the adjustment for max delta up to 3.
174       if (delta < 4) {
175         sig -= sig_stride * 16;
176         mc_running_avg_y -= mc_avg_y_stride * 16;
177         running_avg_y -= avg_y_stride * 16;
178         for (r = 0; r < 16; ++r) {
179           for (c = 0; c < 16; ++c) {
180             int diff = mc_running_avg_y[c] - sig[c];
181             int adjustment = abs(diff);
182             if (adjustment > delta)
183               adjustment = delta;
184             if (diff > 0) {
185               // Bring denoised signal down.
186               if (running_avg_y[c] - adjustment < 0)
187                 running_avg_y[c] = 0;
188               else
189                 running_avg_y[c] = running_avg_y[c] - adjustment;
190               col_sum[c] -= adjustment;
191             } else if (diff < 0) {
192               // Bring denoised signal up.
193               if (running_avg_y[c] + adjustment > 255)
194                 running_avg_y[c] = 255;
195               else
196                 running_avg_y[c] = running_avg_y[c] + adjustment;
197               col_sum[c] += adjustment;
198             }
199           }
200           // TODO(marpan): Check here if abs(sum_diff) has gone below the
201           // threshold sum_diff_thresh, and if so, we can exit the row loop.
202           sig += sig_stride;
203           mc_running_avg_y += mc_avg_y_stride;
204           running_avg_y += avg_y_stride;
205         }
206 
207         sum_diff = 0;
208         for (c = 0; c < 16; ++c) {
209           if (col_sum[c] >= 128) {
210             col_sum[c] = 127;
211           }
212           sum_diff += col_sum[c];
213         }
214 
215         if (abs(sum_diff) > sum_diff_thresh)
216           return COPY_BLOCK;
217       } else {
218         return COPY_BLOCK;
219       }
220     }
221 
222     vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride);
223     return FILTER_BLOCK;
224 }
225 
vp8_denoiser_filter_uv_c(unsigned char * mc_running_avg_uv,int mc_avg_uv_stride,unsigned char * running_avg_uv,int avg_uv_stride,unsigned char * sig,int sig_stride,unsigned int motion_magnitude,int increase_denoising)226 int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg_uv,
227                              int mc_avg_uv_stride,
228                              unsigned char *running_avg_uv,
229                              int avg_uv_stride,
230                              unsigned char *sig,
231                              int sig_stride,
232                              unsigned int motion_magnitude,
233                              int increase_denoising) {
234     unsigned char *running_avg_uv_start = running_avg_uv;
235     unsigned char *sig_start = sig;
236     int sum_diff_thresh;
237     int r, c;
238     int sum_diff = 0;
239     int sum_block = 0;
240     int adj_val[3] = {3, 4, 6};
241     int shift_inc1 = 0;
242     int shift_inc2 = 1;
243     /* If motion_magnitude is small, making the denoiser more aggressive by
244      * increasing the adjustment for each level. Add another increment for
245      * blocks that are labeled for increase denoising. */
246     if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) {
247       if (increase_denoising) {
248         shift_inc1 = 1;
249         shift_inc2 = 2;
250       }
251       adj_val[0] += shift_inc2;
252       adj_val[1] += shift_inc2;
253       adj_val[2] += shift_inc2;
254     }
255 
256     // Avoid denoising color signal if its close to average level.
257     for (r = 0; r < 8; ++r) {
258       for (c = 0; c < 8; ++c) {
259         sum_block += sig[c];
260       }
261       sig += sig_stride;
262     }
263     if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) {
264       return COPY_BLOCK;
265     }
266 
267     sig -= sig_stride * 8;
268     for (r = 0; r < 8; ++r) {
269       for (c = 0; c < 8; ++c) {
270         int diff = 0;
271         int adjustment = 0;
272         int absdiff = 0;
273 
274         diff = mc_running_avg_uv[c] - sig[c];
275         absdiff = abs(diff);
276 
277         // When |diff| <= |3 + shift_inc1|, use pixel value from
278         // last denoised raw.
279         if (absdiff <= 3 + shift_inc1) {
280           running_avg_uv[c] = mc_running_avg_uv[c];
281           sum_diff += diff;
282         } else {
283           if (absdiff >= 4 && absdiff <= 7)
284             adjustment = adj_val[0];
285           else if (absdiff >= 8 && absdiff <= 15)
286             adjustment = adj_val[1];
287           else
288             adjustment = adj_val[2];
289           if (diff > 0) {
290             if ((sig[c] + adjustment) > 255)
291               running_avg_uv[c] = 255;
292             else
293               running_avg_uv[c] = sig[c] + adjustment;
294             sum_diff += adjustment;
295           } else {
296             if ((sig[c] - adjustment) < 0)
297               running_avg_uv[c] = 0;
298             else
299               running_avg_uv[c] = sig[c] - adjustment;
300             sum_diff -= adjustment;
301           }
302         }
303       }
304       /* Update pointers for next iteration. */
305       sig += sig_stride;
306       mc_running_avg_uv += mc_avg_uv_stride;
307       running_avg_uv += avg_uv_stride;
308     }
309 
310     sum_diff_thresh= SUM_DIFF_THRESHOLD_UV;
311     if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV;
312     if (abs(sum_diff) > sum_diff_thresh) {
313       // Before returning to copy the block (i.e., apply no denoising), check
314       // if we can still apply some (weaker) temporal filtering to this block,
315       // that would otherwise not be denoised at all. Simplest is to apply
316       // an additional adjustment to running_avg_y to bring it closer to sig.
317       // The adjustment is capped by a maximum delta, and chosen such that
318       // in most cases the resulting sum_diff will be within the
319       // accceptable range given by sum_diff_thresh.
320 
321       // The delta is set by the excess of absolute pixel diff over threshold.
322       int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1;
323       // Only apply the adjustment for max delta up to 3.
324       if (delta < 4) {
325         sig -= sig_stride * 8;
326         mc_running_avg_uv -= mc_avg_uv_stride * 8;
327         running_avg_uv -= avg_uv_stride * 8;
328         for (r = 0; r < 8; ++r) {
329           for (c = 0; c < 8; ++c) {
330             int diff = mc_running_avg_uv[c] - sig[c];
331             int adjustment = abs(diff);
332             if (adjustment > delta)
333               adjustment = delta;
334             if (diff > 0) {
335               // Bring denoised signal down.
336               if (running_avg_uv[c] - adjustment < 0)
337                 running_avg_uv[c] = 0;
338               else
339                 running_avg_uv[c] = running_avg_uv[c] - adjustment;
340               sum_diff -= adjustment;
341             } else if (diff < 0) {
342               // Bring denoised signal up.
343               if (running_avg_uv[c] + adjustment > 255)
344                 running_avg_uv[c] = 255;
345               else
346                 running_avg_uv[c] = running_avg_uv[c] + adjustment;
347               sum_diff += adjustment;
348             }
349           }
350           // TODO(marpan): Check here if abs(sum_diff) has gone below the
351           // threshold sum_diff_thresh, and if so, we can exit the row loop.
352           sig += sig_stride;
353           mc_running_avg_uv += mc_avg_uv_stride;
354           running_avg_uv += avg_uv_stride;
355         }
356         if (abs(sum_diff) > sum_diff_thresh)
357           return COPY_BLOCK;
358       } else {
359         return COPY_BLOCK;
360       }
361     }
362 
363     vp8_copy_mem8x8(running_avg_uv_start, avg_uv_stride, sig_start,
364                     sig_stride);
365     return FILTER_BLOCK;
366 }
367 
vp8_denoiser_set_parameters(VP8_DENOISER * denoiser,int mode)368 void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) {
369   assert(mode > 0);  // Denoiser is allocated only if mode > 0.
370   if (mode == 1) {
371     denoiser->denoiser_mode = kDenoiserOnYOnly;
372   } else if (mode == 2) {
373     denoiser->denoiser_mode = kDenoiserOnYUV;
374   } else if (mode == 3) {
375     denoiser->denoiser_mode = kDenoiserOnYUVAggressive;
376   } else {
377     denoiser->denoiser_mode = kDenoiserOnYUV;
378   }
379   if (denoiser->denoiser_mode != kDenoiserOnYUVAggressive) {
380     denoiser->denoise_pars.scale_sse_thresh = 1;
381     denoiser->denoise_pars.scale_motion_thresh = 8;
382     denoiser->denoise_pars.scale_increase_filter = 0;
383     denoiser->denoise_pars.denoise_mv_bias = 95;
384     denoiser->denoise_pars.pickmode_mv_bias = 100;
385     denoiser->denoise_pars.qp_thresh = 0;
386     denoiser->denoise_pars.consec_zerolast = UINT_MAX;
387     denoiser->denoise_pars.spatial_blur = 0;
388   } else {
389     denoiser->denoise_pars.scale_sse_thresh = 2;
390     denoiser->denoise_pars.scale_motion_thresh = 16;
391     denoiser->denoise_pars.scale_increase_filter = 1;
392     denoiser->denoise_pars.denoise_mv_bias = 60;
393     denoiser->denoise_pars.pickmode_mv_bias = 75;
394     denoiser->denoise_pars.qp_thresh = 80;
395     denoiser->denoise_pars.consec_zerolast = 15;
396     denoiser->denoise_pars.spatial_blur = 0;
397   }
398 }
399 
vp8_denoiser_allocate(VP8_DENOISER * denoiser,int width,int height,int num_mb_rows,int num_mb_cols,int mode)400 int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height,
401                           int num_mb_rows, int num_mb_cols, int mode)
402 {
403     int i;
404     assert(denoiser);
405     denoiser->num_mb_cols = num_mb_cols;
406 
407     for (i = 0; i < MAX_REF_FRAMES; i++)
408     {
409         denoiser->yv12_running_avg[i].flags = 0;
410 
411         if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width,
412                                         height, VP8BORDERINPIXELS)
413             < 0)
414         {
415             vp8_denoiser_free(denoiser);
416             return 1;
417         }
418         memset(denoiser->yv12_running_avg[i].buffer_alloc, 0,
419                denoiser->yv12_running_avg[i].frame_size);
420 
421     }
422     denoiser->yv12_mc_running_avg.flags = 0;
423 
424     if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width,
425                                    height, VP8BORDERINPIXELS) < 0)
426     {
427         vp8_denoiser_free(denoiser);
428         return 1;
429     }
430 
431     memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0,
432            denoiser->yv12_mc_running_avg.frame_size);
433 
434     if (vp8_yv12_alloc_frame_buffer(&denoiser->yv12_last_source, width,
435                                     height, VP8BORDERINPIXELS) < 0) {
436       vp8_denoiser_free(denoiser);
437       return 1;
438     }
439     memset(denoiser->yv12_last_source.buffer_alloc, 0,
440            denoiser->yv12_last_source.frame_size);
441 
442     denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1);
443     memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols));
444     vp8_denoiser_set_parameters(denoiser, mode);
445     denoiser->nmse_source_diff = 0;
446     denoiser->nmse_source_diff_count = 0;
447     denoiser->qp_avg = 0;
448     // QP threshold below which we can go up to aggressive mode.
449     denoiser->qp_threshold_up = 80;
450     // QP threshold above which we can go back down to normal mode.
451     // For now keep this second threshold high, so not used currently.
452     denoiser->qp_threshold_down = 128;
453     // Bitrate thresholds and noise metric (nmse) thresholds for switching to
454     // aggressive mode.
455     // TODO(marpan): Adjust thresholds, including effect on resolution.
456     denoiser->bitrate_threshold = 400000;  // (bits/sec).
457     denoiser->threshold_aggressive_mode = 80;
458     if (width * height > 1280 * 720) {
459       denoiser->bitrate_threshold = 3000000;
460       denoiser->threshold_aggressive_mode = 200;
461     } else if (width * height > 960 * 540) {
462       denoiser->bitrate_threshold = 1200000;
463       denoiser->threshold_aggressive_mode = 120;
464     } else if (width * height > 640 * 480) {
465       denoiser->bitrate_threshold = 600000;
466       denoiser->threshold_aggressive_mode = 100;
467     }
468     return 0;
469 }
470 
471 
vp8_denoiser_free(VP8_DENOISER * denoiser)472 void vp8_denoiser_free(VP8_DENOISER *denoiser)
473 {
474     int i;
475     assert(denoiser);
476 
477     for (i = 0; i < MAX_REF_FRAMES ; i++)
478     {
479         vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]);
480     }
481     vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg);
482     vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_last_source);
483     vpx_free(denoiser->denoise_state);
484 }
485 
vp8_denoiser_denoise_mb(VP8_DENOISER * denoiser,MACROBLOCK * x,unsigned int best_sse,unsigned int zero_mv_sse,int recon_yoffset,int recon_uvoffset,loop_filter_info_n * lfi_n,int mb_row,int mb_col,int block_index)486 void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser,
487                              MACROBLOCK *x,
488                              unsigned int best_sse,
489                              unsigned int zero_mv_sse,
490                              int recon_yoffset,
491                              int recon_uvoffset,
492                              loop_filter_info_n *lfi_n,
493                              int mb_row,
494                              int mb_col,
495                              int block_index)
496 
497 {
498     int mv_row;
499     int mv_col;
500     unsigned int motion_threshold;
501     unsigned int motion_magnitude2;
502     unsigned int sse_thresh;
503     int sse_diff_thresh = 0;
504     // Spatial loop filter: only applied selectively based on
505     // temporal filter state of block relative to top/left neighbors.
506     int apply_spatial_loop_filter = 1;
507     MV_REFERENCE_FRAME frame = x->best_reference_frame;
508     MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame;
509 
510     enum vp8_denoiser_decision decision = FILTER_BLOCK;
511     enum vp8_denoiser_decision decision_u = COPY_BLOCK;
512     enum vp8_denoiser_decision decision_v = COPY_BLOCK;
513 
514     if (zero_frame)
515     {
516         YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame];
517         YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg;
518         YV12_BUFFER_CONFIG saved_pre,saved_dst;
519         MB_MODE_INFO saved_mbmi;
520         MACROBLOCKD *filter_xd = &x->e_mbd;
521         MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi;
522         int sse_diff = 0;
523         // Bias on zero motion vector sse.
524         const int zero_bias = denoiser->denoise_pars.denoise_mv_bias;
525         zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100);
526         sse_diff = zero_mv_sse - best_sse;
527 
528         saved_mbmi = *mbmi;
529 
530         /* Use the best MV for the compensation. */
531         mbmi->ref_frame = x->best_reference_frame;
532         mbmi->mode = x->best_sse_inter_mode;
533         mbmi->mv = x->best_sse_mv;
534         mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs;
535         mv_col = x->best_sse_mv.as_mv.col;
536         mv_row = x->best_sse_mv.as_mv.row;
537         // Bias to zero_mv if small amount of motion.
538         // Note sse_diff_thresh is intialized to zero, so this ensures
539         // we will always choose zero_mv for denoising if
540         // zero_mv_see <= best_sse (i.e., sse_diff <= 0).
541         if ((unsigned int)(mv_row * mv_row + mv_col * mv_col)
542             <= NOISE_MOTION_THRESHOLD)
543             sse_diff_thresh = (int)SSE_DIFF_THRESHOLD;
544 
545         if (frame == INTRA_FRAME ||
546             sse_diff <= sse_diff_thresh)
547         {
548             /*
549              * Handle intra blocks as referring to last frame with zero motion
550              * and let the absolute pixel difference affect the filter factor.
551              * Also consider small amount of motion as being random walk due
552              * to noise, if it doesn't mean that we get a much bigger error.
553              * Note that any changes to the mode info only affects the
554              * denoising.
555              */
556             x->denoise_zeromv = 1;
557             mbmi->ref_frame =
558                     x->best_zeromv_reference_frame;
559 
560             src = &denoiser->yv12_running_avg[zero_frame];
561 
562             mbmi->mode = ZEROMV;
563             mbmi->mv.as_int = 0;
564             x->best_sse_inter_mode = ZEROMV;
565             x->best_sse_mv.as_int = 0;
566             best_sse = zero_mv_sse;
567         }
568 
569         saved_pre = filter_xd->pre;
570         saved_dst = filter_xd->dst;
571 
572         /* Compensate the running average. */
573         filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset;
574         filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset;
575         filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset;
576         /* Write the compensated running average to the destination buffer. */
577         filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset;
578         filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset;
579         filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset;
580 
581         if (!x->skip)
582         {
583             vp8_build_inter_predictors_mb(filter_xd);
584         }
585         else
586         {
587             vp8_build_inter16x16_predictors_mb(filter_xd,
588                                                filter_xd->dst.y_buffer,
589                                                filter_xd->dst.u_buffer,
590                                                filter_xd->dst.v_buffer,
591                                                filter_xd->dst.y_stride,
592                                                filter_xd->dst.uv_stride);
593         }
594         filter_xd->pre = saved_pre;
595         filter_xd->dst = saved_dst;
596         *mbmi = saved_mbmi;
597 
598     }
599 
600     mv_row = x->best_sse_mv.as_mv.row;
601     mv_col = x->best_sse_mv.as_mv.col;
602     motion_magnitude2 = mv_row * mv_row + mv_col * mv_col;
603     motion_threshold = denoiser->denoise_pars.scale_motion_thresh *
604         NOISE_MOTION_THRESHOLD;
605 
606     // If block is considered to be skin area, lower the motion threshold.
607     // In current version set threshold = 1, so only denoise very low
608     // (i.e., zero) mv on skin.
609     if (x->is_skin)
610         motion_threshold = 1;
611 
612     if (motion_magnitude2 <
613         denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD)
614       x->increase_denoising = 1;
615 
616     sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD;
617     if (x->increase_denoising)
618       sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD_HIGH;
619 
620     if (best_sse > sse_thresh || motion_magnitude2 > motion_threshold)
621       decision = COPY_BLOCK;
622 
623     if (decision == FILTER_BLOCK)
624     {
625         unsigned char *mc_running_avg_y =
626             denoiser->yv12_mc_running_avg.y_buffer + recon_yoffset;
627         int mc_avg_y_stride = denoiser->yv12_mc_running_avg.y_stride;
628         unsigned char *running_avg_y =
629             denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset;
630         int avg_y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
631 
632         /* Filter. */
633         decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride,
634                                        running_avg_y, avg_y_stride,
635                                        x->thismb, 16, motion_magnitude2,
636                                        x->increase_denoising);
637         denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ?
638             kFilterNonZeroMV : kFilterZeroMV;
639         // Only denoise UV for zero motion, and if y channel was denoised.
640         if (denoiser->denoiser_mode != kDenoiserOnYOnly &&
641             motion_magnitude2 == 0 &&
642             decision == FILTER_BLOCK) {
643           unsigned char *mc_running_avg_u =
644               denoiser->yv12_mc_running_avg.u_buffer + recon_uvoffset;
645           unsigned char *running_avg_u =
646               denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset;
647           unsigned char *mc_running_avg_v =
648               denoiser->yv12_mc_running_avg.v_buffer + recon_uvoffset;
649           unsigned char *running_avg_v =
650               denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset;
651           int mc_avg_uv_stride = denoiser->yv12_mc_running_avg.uv_stride;
652           int avg_uv_stride = denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
653           int signal_stride = x->block[16].src_stride;
654           decision_u =
655               vp8_denoiser_filter_uv(mc_running_avg_u, mc_avg_uv_stride,
656                                       running_avg_u, avg_uv_stride,
657                                       x->block[16].src + *x->block[16].base_src,
658                                       signal_stride, motion_magnitude2, 0);
659           decision_v =
660               vp8_denoiser_filter_uv(mc_running_avg_v, mc_avg_uv_stride,
661                                       running_avg_v, avg_uv_stride,
662                                       x->block[20].src + *x->block[20].base_src,
663                                       signal_stride, motion_magnitude2, 0);
664         }
665     }
666     if (decision == COPY_BLOCK)
667     {
668         /* No filtering of this block; it differs too much from the predictor,
669          * or the motion vector magnitude is considered too big.
670          */
671         x->denoise_zeromv = 0;
672         vp8_copy_mem16x16(
673                 x->thismb, 16,
674                 denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
675                 denoiser->yv12_running_avg[INTRA_FRAME].y_stride);
676         denoiser->denoise_state[block_index] = kNoFilter;
677     }
678     if (denoiser->denoiser_mode != kDenoiserOnYOnly) {
679       if (decision_u == COPY_BLOCK) {
680         vp8_copy_mem8x8(
681             x->block[16].src + *x->block[16].base_src, x->block[16].src_stride,
682             denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset,
683             denoiser->yv12_running_avg[INTRA_FRAME].uv_stride);
684       }
685       if (decision_v == COPY_BLOCK) {
686         vp8_copy_mem8x8(
687             x->block[20].src + *x->block[20].base_src, x->block[16].src_stride,
688             denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset,
689             denoiser->yv12_running_avg[INTRA_FRAME].uv_stride);
690       }
691     }
692     // Option to selectively deblock the denoised signal, for y channel only.
693     if (apply_spatial_loop_filter) {
694       loop_filter_info lfi;
695       int apply_filter_col = 0;
696       int apply_filter_row = 0;
697       int apply_filter = 0;
698       int y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride;
699       int uv_stride =denoiser->yv12_running_avg[INTRA_FRAME].uv_stride;
700 
701       // Fix filter level to some nominal value for now.
702       int filter_level = 48;
703 
704       int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level];
705       lfi.mblim = lfi_n->mblim[filter_level];
706       lfi.blim = lfi_n->blim[filter_level];
707       lfi.lim = lfi_n->lim[filter_level];
708       lfi.hev_thr = lfi_n->hev_thr[hev_index];
709 
710       // Apply filter if there is a difference in the denoiser filter state
711       // between the current and left/top block, or if non-zero motion vector
712       // is used for the motion-compensated filtering.
713       if (mb_col > 0) {
714         apply_filter_col = !((denoiser->denoise_state[block_index] ==
715             denoiser->denoise_state[block_index - 1]) &&
716             denoiser->denoise_state[block_index] != kFilterNonZeroMV);
717         if (apply_filter_col) {
718           // Filter left vertical edge.
719           apply_filter = 1;
720           vp8_loop_filter_mbv(
721               denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
722               NULL, NULL, y_stride, uv_stride, &lfi);
723         }
724       }
725       if (mb_row > 0) {
726         apply_filter_row = !((denoiser->denoise_state[block_index] ==
727             denoiser->denoise_state[block_index - denoiser->num_mb_cols]) &&
728             denoiser->denoise_state[block_index] != kFilterNonZeroMV);
729         if (apply_filter_row) {
730           // Filter top horizontal edge.
731           apply_filter = 1;
732           vp8_loop_filter_mbh(
733               denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
734               NULL, NULL, y_stride, uv_stride, &lfi);
735         }
736       }
737       if (apply_filter) {
738         // Update the signal block |x|. Pixel changes are only to top and/or
739         // left boundary pixels: can we avoid full block copy here.
740         vp8_copy_mem16x16(
741             denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset,
742             y_stride, x->thismb, 16);
743       }
744     }
745 }
746