1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/film_grain.h"
16 
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 #include <new>
23 
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/dsp/film_grain_common.h"
28 #include "src/utils/array_2d.h"
29 #include "src/utils/common.h"
30 #include "src/utils/compiler_attributes.h"
31 #include "src/utils/logging.h"
32 
33 namespace libgav1 {
34 namespace dsp {
35 namespace film_grain {
36 namespace {
37 
38 // Making this a template function prevents it from adding to code size when it
39 // is not placed in the DSP table. Most functions in the dsp directory change
40 // behavior by bitdepth, but because this one doesn't, it receives a dummy
41 // parameter with one enforced value, ensuring only one copy is made.
42 template <int singleton>
InitializeScalingLookupTable_C(int num_points,const uint8_t point_value[],const uint8_t point_scaling[],uint8_t scaling_lut[kScalingLookupTableSize])43 void InitializeScalingLookupTable_C(
44     int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
45     uint8_t scaling_lut[kScalingLookupTableSize]) {
46   static_assert(singleton == 0,
47                 "Improper instantiation of InitializeScalingLookupTable_C. "
48                 "There should be only one copy of this function.");
49   if (num_points == 0) {
50     memset(scaling_lut, 0, sizeof(scaling_lut[0]) * kScalingLookupTableSize);
51     return;
52   }
53   static_assert(sizeof(scaling_lut[0]) == 1, "");
54   memset(scaling_lut, point_scaling[0], point_value[0]);
55   for (int i = 0; i < num_points - 1; ++i) {
56     const int delta_y = point_scaling[i + 1] - point_scaling[i];
57     const int delta_x = point_value[i + 1] - point_value[i];
58     const int delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
59     for (int x = 0; x < delta_x; ++x) {
60       const int v = point_scaling[i] + ((x * delta + 32768) >> 16);
61       assert(v >= 0 && v <= UINT8_MAX);
62       scaling_lut[point_value[i] + x] = v;
63     }
64   }
65   const uint8_t last_point_value = point_value[num_points - 1];
66   memset(&scaling_lut[last_point_value], point_scaling[num_points - 1],
67          kScalingLookupTableSize - last_point_value);
68 }
69 
70 // Section 7.18.3.5.
71 // Performs a piecewise linear interpolation into the scaling table.
72 template <int bitdepth>
ScaleLut(const uint8_t scaling_lut[kScalingLookupTableSize],int index)73 int ScaleLut(const uint8_t scaling_lut[kScalingLookupTableSize], int index) {
74   const int shift = bitdepth - 8;
75   const int quotient = index >> shift;
76   const int remainder = index - (quotient << shift);
77   if (bitdepth == 8) {
78     assert(quotient < kScalingLookupTableSize);
79     return scaling_lut[quotient];
80   }
81   assert(quotient + 1 < kScalingLookupTableSize);
82   const int start = scaling_lut[quotient];
83   const int end = scaling_lut[quotient + 1];
84   return start + RightShiftWithRounding((end - start) * remainder, shift);
85 }
86 
87 // Applies an auto-regressive filter to the white noise in luma_grain.
88 template <int bitdepth, typename GrainType>
ApplyAutoRegressiveFilterToLumaGrain_C(const FilmGrainParams & params,void * luma_grain_buffer)89 void ApplyAutoRegressiveFilterToLumaGrain_C(const FilmGrainParams& params,
90                                             void* luma_grain_buffer) {
91   auto* luma_grain = static_cast<GrainType*>(luma_grain_buffer);
92   const int grain_min = GetGrainMin<bitdepth>();
93   const int grain_max = GetGrainMax<bitdepth>();
94   const int auto_regression_coeff_lag = params.auto_regression_coeff_lag;
95   assert(auto_regression_coeff_lag > 0 && auto_regression_coeff_lag <= 3);
96   // A pictorial representation of the auto-regressive filter for various values
97   // of auto_regression_coeff_lag. The letter 'O' represents the current sample.
98   // (The filter always operates on the current sample with filter
99   // coefficient 1.) The letters 'X' represent the neighboring samples that the
100   // filter operates on.
101   //
102   // auto_regression_coeff_lag == 3:
103   //   X X X X X X X
104   //   X X X X X X X
105   //   X X X X X X X
106   //   X X X O
107   // auto_regression_coeff_lag == 2:
108   //     X X X X X
109   //     X X X X X
110   //     X X O
111   // auto_regression_coeff_lag == 1:
112   //       X X X
113   //       X O
114   // auto_regression_coeff_lag == 0:
115   //         O
116   //
117   // Note that if auto_regression_coeff_lag is 0, the filter is the identity
118   // filter and therefore can be skipped. This implementation assumes it is not
119   // called in that case.
120   const int shift = params.auto_regression_shift;
121   for (int y = kAutoRegressionBorder; y < kLumaHeight; ++y) {
122     for (int x = kAutoRegressionBorder; x < kLumaWidth - kAutoRegressionBorder;
123          ++x) {
124       int sum = 0;
125       int pos = 0;
126       int delta_row = -auto_regression_coeff_lag;
127       // The last iteration (delta_row == 0) is shorter and is handled
128       // separately.
129       do {
130         int delta_column = -auto_regression_coeff_lag;
131         do {
132           const int coeff = params.auto_regression_coeff_y[pos];
133           sum += luma_grain[(y + delta_row) * kLumaWidth + (x + delta_column)] *
134                  coeff;
135           ++pos;
136         } while (++delta_column <= auto_regression_coeff_lag);
137       } while (++delta_row < 0);
138       // Last iteration: delta_row == 0.
139       {
140         int delta_column = -auto_regression_coeff_lag;
141         do {
142           const int coeff = params.auto_regression_coeff_y[pos];
143           sum += luma_grain[y * kLumaWidth + (x + delta_column)] * coeff;
144           ++pos;
145         } while (++delta_column < 0);
146       }
147       luma_grain[y * kLumaWidth + x] = Clip3(
148           luma_grain[y * kLumaWidth + x] + RightShiftWithRounding(sum, shift),
149           grain_min, grain_max);
150     }
151   }
152 }
153 
154 template <int bitdepth, typename GrainType, int auto_regression_coeff_lag,
155           bool use_luma>
ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams & params,const void * luma_grain_buffer,int subsampling_x,int subsampling_y,void * u_grain_buffer,void * v_grain_buffer)156 void ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams& params,
157                                                const void* luma_grain_buffer,
158                                                int subsampling_x,
159                                                int subsampling_y,
160                                                void* u_grain_buffer,
161                                                void* v_grain_buffer) {
162   static_assert(
163       auto_regression_coeff_lag >= 0 && auto_regression_coeff_lag <= 3,
164       "Unsupported autoregression lag for chroma.");
165   const auto* luma_grain = static_cast<const GrainType*>(luma_grain_buffer);
166   const int grain_min = GetGrainMin<bitdepth>();
167   const int grain_max = GetGrainMax<bitdepth>();
168   auto* u_grain = static_cast<GrainType*>(u_grain_buffer);
169   auto* v_grain = static_cast<GrainType*>(v_grain_buffer);
170   const int shift = params.auto_regression_shift;
171   const int chroma_height =
172       (subsampling_y == 0) ? kMaxChromaHeight : kMinChromaHeight;
173   const int chroma_width =
174       (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
175   for (int y = kAutoRegressionBorder; y < chroma_height; ++y) {
176     const int luma_y =
177         ((y - kAutoRegressionBorder) << subsampling_y) + kAutoRegressionBorder;
178     for (int x = kAutoRegressionBorder;
179          x < chroma_width - kAutoRegressionBorder; ++x) {
180       int sum_u = 0;
181       int sum_v = 0;
182       int pos = 0;
183       int delta_row = -auto_regression_coeff_lag;
184       do {
185         int delta_column = -auto_regression_coeff_lag;
186         do {
187           if (delta_row == 0 && delta_column == 0) {
188             break;
189           }
190           const int coeff_u = params.auto_regression_coeff_u[pos];
191           const int coeff_v = params.auto_regression_coeff_v[pos];
192           sum_u +=
193               u_grain[(y + delta_row) * chroma_width + (x + delta_column)] *
194               coeff_u;
195           sum_v +=
196               v_grain[(y + delta_row) * chroma_width + (x + delta_column)] *
197               coeff_v;
198           ++pos;
199         } while (++delta_column <= auto_regression_coeff_lag);
200       } while (++delta_row <= 0);
201       if (use_luma) {
202         int luma = 0;
203         const int luma_x = ((x - kAutoRegressionBorder) << subsampling_x) +
204                            kAutoRegressionBorder;
205         int i = 0;
206         do {
207           int j = 0;
208           do {
209             luma += luma_grain[(luma_y + i) * kLumaWidth + (luma_x + j)];
210           } while (++j <= subsampling_x);
211         } while (++i <= subsampling_y);
212         luma = SubsampledValue(luma, subsampling_x + subsampling_y);
213         const int coeff_u = params.auto_regression_coeff_u[pos];
214         const int coeff_v = params.auto_regression_coeff_v[pos];
215         sum_u += luma * coeff_u;
216         sum_v += luma * coeff_v;
217       }
218       u_grain[y * chroma_width + x] = Clip3(
219           u_grain[y * chroma_width + x] + RightShiftWithRounding(sum_u, shift),
220           grain_min, grain_max);
221       v_grain[y * chroma_width + x] = Clip3(
222           v_grain[y * chroma_width + x] + RightShiftWithRounding(sum_v, shift),
223           grain_min, grain_max);
224     }
225   }
226 }
227 
228 // This implementation is for the condition overlap_flag == false.
229 template <int bitdepth, typename GrainType>
ConstructNoiseStripes_C(const void * grain_buffer,int grain_seed,int width,int height,int subsampling_x,int subsampling_y,void * noise_stripes_buffer)230 void ConstructNoiseStripes_C(const void* grain_buffer, int grain_seed,
231                              int width, int height, int subsampling_x,
232                              int subsampling_y, void* noise_stripes_buffer) {
233   auto* noise_stripes =
234       static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
235   const auto* grain = static_cast<const GrainType*>(grain_buffer);
236   const int half_width = DivideBy2(width + 1);
237   const int half_height = DivideBy2(height + 1);
238   assert(half_width > 0);
239   assert(half_height > 0);
240   static_assert(kLumaWidth == kMaxChromaWidth,
241                 "kLumaWidth width should be equal to kMaxChromaWidth");
242   const int grain_width =
243       (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
244   const int plane_width = (width + subsampling_x) >> subsampling_x;
245   constexpr int kNoiseStripeHeight = 34;
246   int luma_num = 0;
247   int y = 0;
248   do {
249     GrainType* const noise_stripe = (*noise_stripes)[luma_num];
250     uint16_t seed = grain_seed;
251     seed ^= ((luma_num * 37 + 178) & 255) << 8;
252     seed ^= ((luma_num * 173 + 105) & 255);
253     int x = 0;
254     do {
255       const int rand = GetFilmGrainRandomNumber(8, &seed);
256       const int offset_x = rand >> 4;
257       const int offset_y = rand & 15;
258       const int plane_offset_x =
259           (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
260       const int plane_offset_y =
261           (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
262       int i = 0;
263       do {
264         // Section 7.18.3.5 says:
265         //   noiseStripe[ lumaNum ][ 0 ] is 34 samples high and w samples
266         //   wide (a few additional samples across are actually written to
267         //   the array, but these are never read) ...
268         //
269         // Note: The warning in the parentheses also applies to
270         // noiseStripe[ lumaNum ][ 1 ] and noiseStripe[ lumaNum ][ 2 ].
271         //
272         // Writes beyond the width of each row could happen below. To
273         // prevent those writes, we clip the number of pixels to copy against
274         // the remaining width.
275         // TODO(petersonab): Allocate aligned stripes with extra width to cover
276         // the size of the final stripe block, then remove this call to min.
277         const int copy_size =
278             std::min(kNoiseStripeHeight >> subsampling_x,
279                      plane_width - (x << (1 - subsampling_x)));
280         memcpy(&noise_stripe[i * plane_width + (x << (1 - subsampling_x))],
281                &grain[(plane_offset_y + i) * grain_width + plane_offset_x],
282                copy_size * sizeof(noise_stripe[0]));
283       } while (++i < (kNoiseStripeHeight >> subsampling_y));
284       x += 16;
285     } while (x < half_width);
286 
287     ++luma_num;
288     y += 16;
289   } while (y < half_height);
290 }
291 
292 // This implementation is for the condition overlap_flag == true.
293 template <int bitdepth, typename GrainType>
ConstructNoiseStripesWithOverlap_C(const void * grain_buffer,int grain_seed,int width,int height,int subsampling_x,int subsampling_y,void * noise_stripes_buffer)294 void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
295                                         int grain_seed, int width, int height,
296                                         int subsampling_x, int subsampling_y,
297                                         void* noise_stripes_buffer) {
298   auto* noise_stripes =
299       static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
300   const auto* grain = static_cast<const GrainType*>(grain_buffer);
301   const int half_width = DivideBy2(width + 1);
302   const int half_height = DivideBy2(height + 1);
303   assert(half_width > 0);
304   assert(half_height > 0);
305   static_assert(kLumaWidth == kMaxChromaWidth,
306                 "kLumaWidth width should be equal to kMaxChromaWidth");
307   const int grain_width =
308       (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
309   const int plane_width = (width + subsampling_x) >> subsampling_x;
310   constexpr int kNoiseStripeHeight = 34;
311   int luma_num = 0;
312   int y = 0;
313   do {
314     GrainType* const noise_stripe = (*noise_stripes)[luma_num];
315     uint16_t seed = grain_seed;
316     seed ^= ((luma_num * 37 + 178) & 255) << 8;
317     seed ^= ((luma_num * 173 + 105) & 255);
318     // Begin special iteration for x == 0.
319     const int rand = GetFilmGrainRandomNumber(8, &seed);
320     const int offset_x = rand >> 4;
321     const int offset_y = rand & 15;
322     const int plane_offset_x =
323         (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
324     const int plane_offset_y =
325         (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
326     // The overlap computation only occurs when x > 0, so it is omitted here.
327     int i = 0;
328     do {
329       // TODO(petersonab): Allocate aligned stripes with extra width to cover
330       // the size of the final stripe block, then remove this call to min.
331       const int copy_size =
332           std::min(kNoiseStripeHeight >> subsampling_x, plane_width);
333       memcpy(&noise_stripe[i * plane_width],
334              &grain[(plane_offset_y + i) * grain_width + plane_offset_x],
335              copy_size * sizeof(noise_stripe[0]));
336     } while (++i < (kNoiseStripeHeight >> subsampling_y));
337     // End special iteration for x == 0.
338     for (int x = 16; x < half_width; x += 16) {
339       const int rand = GetFilmGrainRandomNumber(8, &seed);
340       const int offset_x = rand >> 4;
341       const int offset_y = rand & 15;
342       const int plane_offset_x =
343           (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
344       const int plane_offset_y =
345           (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
346       int i = 0;
347       do {
348         int j = 0;
349         int grain_sample =
350             grain[(plane_offset_y + i) * grain_width + plane_offset_x];
351         // The first pixel(s) of each segment of the noise_stripe are subject to
352         // the "overlap" computation.
353         if (subsampling_x == 0) {
354           // Corresponds to the line in the spec:
355           // if (j < 2 && x > 0)
356           // j = 0
357           int old = noise_stripe[i * plane_width + x * 2];
358           grain_sample = old * 27 + grain_sample * 17;
359           grain_sample =
360               Clip3(RightShiftWithRounding(grain_sample, 5),
361                     GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
362           noise_stripe[i * plane_width + x * 2] = grain_sample;
363 
364           // This check prevents overwriting for the iteration j = 1. The
365           // continue applies to the i-loop.
366           if (x * 2 + 1 >= plane_width) continue;
367           // j = 1
368           grain_sample =
369               grain[(plane_offset_y + i) * grain_width + plane_offset_x + 1];
370           old = noise_stripe[i * plane_width + x * 2 + 1];
371           grain_sample = old * 17 + grain_sample * 27;
372           grain_sample =
373               Clip3(RightShiftWithRounding(grain_sample, 5),
374                     GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
375           noise_stripe[i * plane_width + x * 2 + 1] = grain_sample;
376           j = 2;
377         } else {
378           // Corresponds to the line in the spec:
379           // if (j == 0 && x > 0)
380           const int old = noise_stripe[i * plane_width + x];
381           grain_sample = old * 23 + grain_sample * 22;
382           grain_sample =
383               Clip3(RightShiftWithRounding(grain_sample, 5),
384                     GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
385           noise_stripe[i * plane_width + x] = grain_sample;
386           j = 1;
387         }
388         // The following covers the rest of the loop over j as described in the
389         // spec.
390         //
391         // Section 7.18.3.5 says:
392         //   noiseStripe[ lumaNum ][ 0 ] is 34 samples high and w samples
393         //   wide (a few additional samples across are actually written to
394         //   the array, but these are never read) ...
395         //
396         // Note: The warning in the parentheses also applies to
397         // noiseStripe[ lumaNum ][ 1 ] and noiseStripe[ lumaNum ][ 2 ].
398         //
399         // Writes beyond the width of each row could happen below. To
400         // prevent those writes, we clip the number of pixels to copy against
401         // the remaining width.
402         // TODO(petersonab): Allocate aligned stripes with extra width to cover
403         // the size of the final stripe block, then remove this call to min.
404         const int copy_size =
405             std::min(kNoiseStripeHeight >> subsampling_x,
406                      plane_width - (x << (1 - subsampling_x))) -
407             j;
408         memcpy(&noise_stripe[i * plane_width + (x << (1 - subsampling_x)) + j],
409                &grain[(plane_offset_y + i) * grain_width + plane_offset_x + j],
410                copy_size * sizeof(noise_stripe[0]));
411       } while (++i < (kNoiseStripeHeight >> subsampling_y));
412     }
413 
414     ++luma_num;
415     y += 16;
416   } while (y < half_height);
417 }
418 
419 template <int bitdepth, typename GrainType>
WriteOverlapLine_C(const GrainType * noise_stripe_row,const GrainType * noise_stripe_row_prev,int plane_width,int grain_coeff,int old_coeff,GrainType * noise_image_row)420 inline void WriteOverlapLine_C(const GrainType* noise_stripe_row,
421                                const GrainType* noise_stripe_row_prev,
422                                int plane_width, int grain_coeff, int old_coeff,
423                                GrainType* noise_image_row) {
424   int x = 0;
425   do {
426     int grain = noise_stripe_row[x];
427     const int old = noise_stripe_row_prev[x];
428     grain = old * old_coeff + grain * grain_coeff;
429     grain = Clip3(RightShiftWithRounding(grain, 5), GetGrainMin<bitdepth>(),
430                   GetGrainMax<bitdepth>());
431     noise_image_row[x] = grain;
432   } while (++x < plane_width);
433 }
434 
435 template <int bitdepth, typename GrainType>
ConstructNoiseImageOverlap_C(const void * noise_stripes_buffer,int width,int height,int subsampling_x,int subsampling_y,void * noise_image_buffer)436 void ConstructNoiseImageOverlap_C(const void* noise_stripes_buffer, int width,
437                                   int height, int subsampling_x,
438                                   int subsampling_y, void* noise_image_buffer) {
439   const auto* noise_stripes =
440       static_cast<const Array2DView<GrainType>*>(noise_stripes_buffer);
441   auto* noise_image = static_cast<Array2D<GrainType>*>(noise_image_buffer);
442   const int plane_width = (width + subsampling_x) >> subsampling_x;
443   const int plane_height = (height + subsampling_y) >> subsampling_y;
444   const int stripe_height = 32 >> subsampling_y;
445   const int stripe_mask = stripe_height - 1;
446   int y = stripe_height;
447   int luma_num = 1;
448   if (subsampling_y == 0) {
449     // Begin complete stripes section. This is when we are guaranteed to have
450     // two overlap rows in each stripe.
451     for (; y < (plane_height & ~stripe_mask); ++luma_num, y += stripe_height) {
452       const GrainType* noise_stripe = (*noise_stripes)[luma_num];
453       const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
454       // First overlap row.
455       WriteOverlapLine_C<bitdepth>(noise_stripe,
456                                    &noise_stripe_prev[32 * plane_width],
457                                    plane_width, 17, 27, (*noise_image)[y]);
458       // Second overlap row.
459       WriteOverlapLine_C<bitdepth>(&noise_stripe[plane_width],
460                                    &noise_stripe_prev[(32 + 1) * plane_width],
461                                    plane_width, 27, 17, (*noise_image)[y + 1]);
462     }
463     // End complete stripes section.
464 
465     const int remaining_height = plane_height - y;
466     // Either one partial stripe remains (remaining_height  > 0),
467     // OR image is less than one stripe high (remaining_height < 0),
468     // OR all stripes are completed (remaining_height == 0).
469     if (remaining_height <= 0) {
470       return;
471     }
472     const GrainType* noise_stripe = (*noise_stripes)[luma_num];
473     const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
474     WriteOverlapLine_C<bitdepth>(noise_stripe,
475                                  &noise_stripe_prev[32 * plane_width],
476                                  plane_width, 17, 27, (*noise_image)[y]);
477 
478     // Check if second overlap row is in the image.
479     if (remaining_height > 1) {
480       WriteOverlapLine_C<bitdepth>(&noise_stripe[plane_width],
481                                    &noise_stripe_prev[(32 + 1) * plane_width],
482                                    plane_width, 27, 17, (*noise_image)[y + 1]);
483     }
484   } else {  // |subsampling_y| == 1
485     // No special checks needed for partial stripes, because if one exists, the
486     // first and only overlap row is guaranteed to exist.
487     for (; y < plane_height; ++luma_num, y += stripe_height) {
488       const GrainType* noise_stripe = (*noise_stripes)[luma_num];
489       const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
490       WriteOverlapLine_C<bitdepth>(noise_stripe,
491                                    &noise_stripe_prev[16 * plane_width],
492                                    plane_width, 22, 23, (*noise_image)[y]);
493     }
494   }
495 }
496 
497 template <int bitdepth, typename GrainType, typename Pixel>
BlendNoiseWithImageLuma_C(const void * noise_image_ptr,int min_value,int max_luma,int scaling_shift,int width,int height,int start_height,const uint8_t scaling_lut_y[kScalingLookupTableSize],const void * source_plane_y,ptrdiff_t source_stride_y,void * dest_plane_y,ptrdiff_t dest_stride_y)498 void BlendNoiseWithImageLuma_C(
499     const void* noise_image_ptr, int min_value, int max_luma, int scaling_shift,
500     int width, int height, int start_height,
501     const uint8_t scaling_lut_y[kScalingLookupTableSize],
502     const void* source_plane_y, ptrdiff_t source_stride_y, void* dest_plane_y,
503     ptrdiff_t dest_stride_y) {
504   const auto* noise_image =
505       static_cast<const Array2D<GrainType>*>(noise_image_ptr);
506   const auto* in_y = static_cast<const Pixel*>(source_plane_y);
507   source_stride_y /= sizeof(Pixel);
508   auto* out_y = static_cast<Pixel*>(dest_plane_y);
509   dest_stride_y /= sizeof(Pixel);
510 
511   int y = 0;
512   do {
513     int x = 0;
514     do {
515       const int orig = in_y[y * source_stride_y + x];
516       int noise = noise_image[kPlaneY][y + start_height][x];
517       noise = RightShiftWithRounding(
518           ScaleLut<bitdepth>(scaling_lut_y, orig) * noise, scaling_shift);
519       out_y[y * dest_stride_y + x] = Clip3(orig + noise, min_value, max_luma);
520     } while (++x < width);
521   } while (++y < height);
522 }
523 
524 // This function is for the case params_.chroma_scaling_from_luma == false.
525 template <int bitdepth, typename GrainType, typename Pixel>
BlendNoiseWithImageChroma_C(Plane plane,const FilmGrainParams & params,const void * noise_image_ptr,int min_value,int max_chroma,int width,int height,int start_height,int subsampling_x,int subsampling_y,const uint8_t scaling_lut_uv[kScalingLookupTableSize],const void * source_plane_y,ptrdiff_t source_stride_y,const void * source_plane_uv,ptrdiff_t source_stride_uv,void * dest_plane_uv,ptrdiff_t dest_stride_uv)526 void BlendNoiseWithImageChroma_C(
527     Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
528     int min_value, int max_chroma, int width, int height, int start_height,
529     int subsampling_x, int subsampling_y,
530     const uint8_t scaling_lut_uv[kScalingLookupTableSize],
531     const void* source_plane_y, ptrdiff_t source_stride_y,
532     const void* source_plane_uv, ptrdiff_t source_stride_uv,
533     void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
534   const auto* noise_image =
535       static_cast<const Array2D<GrainType>*>(noise_image_ptr);
536 
537   const int chroma_width = (width + subsampling_x) >> subsampling_x;
538   const int chroma_height = (height + subsampling_y) >> subsampling_y;
539 
540   const auto* in_y = static_cast<const Pixel*>(source_plane_y);
541   source_stride_y /= sizeof(Pixel);
542   const auto* in_uv = static_cast<const Pixel*>(source_plane_uv);
543   source_stride_uv /= sizeof(Pixel);
544   auto* out_uv = static_cast<Pixel*>(dest_plane_uv);
545   dest_stride_uv /= sizeof(Pixel);
546 
547   const int offset = (plane == kPlaneU) ? params.u_offset : params.v_offset;
548   const int luma_multiplier =
549       (plane == kPlaneU) ? params.u_luma_multiplier : params.v_luma_multiplier;
550   const int multiplier =
551       (plane == kPlaneU) ? params.u_multiplier : params.v_multiplier;
552 
553   const int scaling_shift = params.chroma_scaling;
554   start_height >>= subsampling_y;
555   int y = 0;
556   do {
557     int x = 0;
558     do {
559       const int luma_x = x << subsampling_x;
560       const int luma_y = y << subsampling_y;
561       const int luma_next_x = std::min(luma_x + 1, width - 1);
562       int average_luma;
563       if (subsampling_x != 0) {
564         average_luma = RightShiftWithRounding(
565             in_y[luma_y * source_stride_y + luma_x] +
566                 in_y[luma_y * source_stride_y + luma_next_x],
567             1);
568       } else {
569         average_luma = in_y[luma_y * source_stride_y + luma_x];
570       }
571       const int orig = in_uv[y * source_stride_uv + x];
572       const int combined = average_luma * luma_multiplier + orig * multiplier;
573       const int merged =
574           Clip3((combined >> 6) + LeftShift(offset, bitdepth - 8), 0,
575                 (1 << bitdepth) - 1);
576       int noise = noise_image[plane][y + start_height][x];
577       noise = RightShiftWithRounding(
578           ScaleLut<bitdepth>(scaling_lut_uv, merged) * noise, scaling_shift);
579       out_uv[y * dest_stride_uv + x] =
580           Clip3(orig + noise, min_value, max_chroma);
581     } while (++x < chroma_width);
582   } while (++y < chroma_height);
583 }
584 
585 // This function is for the case params_.chroma_scaling_from_luma == true.
586 // This further implies that scaling_lut_u == scaling_lut_v == scaling_lut_y.
587 template <int bitdepth, typename GrainType, typename Pixel>
BlendNoiseWithImageChromaWithCfl_C(Plane plane,const FilmGrainParams & params,const void * noise_image_ptr,int min_value,int max_chroma,int width,int height,int start_height,int subsampling_x,int subsampling_y,const uint8_t scaling_lut[kScalingLookupTableSize],const void * source_plane_y,ptrdiff_t source_stride_y,const void * source_plane_uv,ptrdiff_t source_stride_uv,void * dest_plane_uv,ptrdiff_t dest_stride_uv)588 void BlendNoiseWithImageChromaWithCfl_C(
589     Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
590     int min_value, int max_chroma, int width, int height, int start_height,
591     int subsampling_x, int subsampling_y,
592     const uint8_t scaling_lut[kScalingLookupTableSize],
593     const void* source_plane_y, ptrdiff_t source_stride_y,
594     const void* source_plane_uv, ptrdiff_t source_stride_uv,
595     void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
596   const auto* noise_image =
597       static_cast<const Array2D<GrainType>*>(noise_image_ptr);
598   const auto* in_y = static_cast<const Pixel*>(source_plane_y);
599   source_stride_y /= sizeof(Pixel);
600   const auto* in_uv = static_cast<const Pixel*>(source_plane_uv);
601   source_stride_uv /= sizeof(Pixel);
602   auto* out_uv = static_cast<Pixel*>(dest_plane_uv);
603   dest_stride_uv /= sizeof(Pixel);
604 
605   const int chroma_width = (width + subsampling_x) >> subsampling_x;
606   const int chroma_height = (height + subsampling_y) >> subsampling_y;
607   const int scaling_shift = params.chroma_scaling;
608   start_height >>= subsampling_y;
609   int y = 0;
610   do {
611     int x = 0;
612     do {
613       const int luma_x = x << subsampling_x;
614       const int luma_y = y << subsampling_y;
615       const int luma_next_x = std::min(luma_x + 1, width - 1);
616       int average_luma;
617       if (subsampling_x != 0) {
618         average_luma = RightShiftWithRounding(
619             in_y[luma_y * source_stride_y + luma_x] +
620                 in_y[luma_y * source_stride_y + luma_next_x],
621             1);
622       } else {
623         average_luma = in_y[luma_y * source_stride_y + luma_x];
624       }
625       const int orig_uv = in_uv[y * source_stride_uv + x];
626       int noise_uv = noise_image[plane][y + start_height][x];
627       noise_uv = RightShiftWithRounding(
628           ScaleLut<bitdepth>(scaling_lut, average_luma) * noise_uv,
629           scaling_shift);
630       out_uv[y * dest_stride_uv + x] =
631           Clip3(orig_uv + noise_uv, min_value, max_chroma);
632     } while (++x < chroma_width);
633   } while (++y < chroma_height);
634 }
635 
Init8bpp()636 void Init8bpp() {
637   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
638   assert(dsp != nullptr);
639 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
640   // LumaAutoRegressionFunc
641   dsp->film_grain.luma_auto_regression[0] =
642       ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
643   dsp->film_grain.luma_auto_regression[1] =
644       ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
645   dsp->film_grain.luma_auto_regression[2] =
646       ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
647 
648   // ChromaAutoRegressionFunc
649   // Chroma autoregression should never be called when lag is 0 and use_luma is
650   // false.
651   dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
652   dsp->film_grain.chroma_auto_regression[0][1] =
653       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
654   dsp->film_grain.chroma_auto_regression[0][2] =
655       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
656   dsp->film_grain.chroma_auto_regression[0][3] =
657       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
658   dsp->film_grain.chroma_auto_regression[1][0] =
659       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
660   dsp->film_grain.chroma_auto_regression[1][1] =
661       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
662   dsp->film_grain.chroma_auto_regression[1][2] =
663       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
664   dsp->film_grain.chroma_auto_regression[1][3] =
665       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
666 
667   // ConstructNoiseStripesFunc
668   dsp->film_grain.construct_noise_stripes[0] =
669       ConstructNoiseStripes_C<8, int8_t>;
670   dsp->film_grain.construct_noise_stripes[1] =
671       ConstructNoiseStripesWithOverlap_C<8, int8_t>;
672 
673   // ConstructNoiseImageOverlapFunc
674   dsp->film_grain.construct_noise_image_overlap =
675       ConstructNoiseImageOverlap_C<8, int8_t>;
676 
677   // InitializeScalingLutFunc
678   dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
679 
680   // BlendNoiseWithImageLumaFunc
681   dsp->film_grain.blend_noise_luma =
682       BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
683 
684   // BlendNoiseWithImageChromaFunc
685   dsp->film_grain.blend_noise_chroma[0] =
686       BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
687   dsp->film_grain.blend_noise_chroma[1] =
688       BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
689 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
690   static_cast<void>(dsp);
691 #ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionLuma
692   dsp->film_grain.luma_auto_regression[0] =
693       ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
694   dsp->film_grain.luma_auto_regression[1] =
695       ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
696   dsp->film_grain.luma_auto_regression[2] =
697       ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
698 #endif
699 #ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionChroma
700   // Chroma autoregression should never be called when lag is 0 and use_luma is
701   // false.
702   dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
703   dsp->film_grain.chroma_auto_regression[0][1] =
704       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
705   dsp->film_grain.chroma_auto_regression[0][2] =
706       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
707   dsp->film_grain.chroma_auto_regression[0][3] =
708       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
709   dsp->film_grain.chroma_auto_regression[1][0] =
710       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
711   dsp->film_grain.chroma_auto_regression[1][1] =
712       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
713   dsp->film_grain.chroma_auto_regression[1][2] =
714       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
715   dsp->film_grain.chroma_auto_regression[1][3] =
716       ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
717 #endif
718 #ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseStripes
719   dsp->film_grain.construct_noise_stripes[0] =
720       ConstructNoiseStripes_C<8, int8_t>;
721   dsp->film_grain.construct_noise_stripes[1] =
722       ConstructNoiseStripesWithOverlap_C<8, int8_t>;
723 #endif
724 #ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseImageOverlap
725   dsp->film_grain.construct_noise_image_overlap =
726       ConstructNoiseImageOverlap_C<8, int8_t>;
727 #endif
728 #ifndef LIBGAV1_Dsp8bpp_FilmGrainInitializeScalingLutFunc
729   dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
730 #endif
731 #ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseLuma
732   dsp->film_grain.blend_noise_luma =
733       BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
734 #endif
735 #ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChroma
736   dsp->film_grain.blend_noise_chroma[0] =
737       BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
738 #endif
739 #ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChromaWithCfl
740   dsp->film_grain.blend_noise_chroma[1] =
741       BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
742 #endif
743 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
744 }
745 
746 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()747 void Init10bpp() {
748   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
749   assert(dsp != nullptr);
750 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
751 
752   // LumaAutoRegressionFunc
753   dsp->film_grain.luma_auto_regression[0] =
754       ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
755   dsp->film_grain.luma_auto_regression[1] =
756       ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
757   dsp->film_grain.luma_auto_regression[2] =
758       ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
759 
760   // ChromaAutoRegressionFunc
761   // Chroma autoregression should never be called when lag is 0 and use_luma is
762   // false.
763   dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
764   dsp->film_grain.chroma_auto_regression[0][1] =
765       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
766   dsp->film_grain.chroma_auto_regression[0][2] =
767       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
768   dsp->film_grain.chroma_auto_regression[0][3] =
769       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
770   dsp->film_grain.chroma_auto_regression[1][0] =
771       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
772   dsp->film_grain.chroma_auto_regression[1][1] =
773       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
774   dsp->film_grain.chroma_auto_regression[1][2] =
775       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
776   dsp->film_grain.chroma_auto_regression[1][3] =
777       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
778 
779   // ConstructNoiseStripesFunc
780   dsp->film_grain.construct_noise_stripes[0] =
781       ConstructNoiseStripes_C<10, int16_t>;
782   dsp->film_grain.construct_noise_stripes[1] =
783       ConstructNoiseStripesWithOverlap_C<10, int16_t>;
784 
785   // ConstructNoiseImageOverlapFunc
786   dsp->film_grain.construct_noise_image_overlap =
787       ConstructNoiseImageOverlap_C<10, int16_t>;
788 
789   // InitializeScalingLutFunc
790   dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
791 
792   // BlendNoiseWithImageLumaFunc
793   dsp->film_grain.blend_noise_luma =
794       BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
795 
796   // BlendNoiseWithImageChromaFunc
797   dsp->film_grain.blend_noise_chroma[0] =
798       BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
799   dsp->film_grain.blend_noise_chroma[1] =
800       BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
801 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
802   static_cast<void>(dsp);
803 #ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionLuma
804   dsp->film_grain.luma_auto_regression[0] =
805       ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
806   dsp->film_grain.luma_auto_regression[1] =
807       ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
808   dsp->film_grain.luma_auto_regression[2] =
809       ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
810 #endif
811 #ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionChroma
812   // Chroma autoregression should never be called when lag is 0 and use_luma is
813   // false.
814   dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
815   dsp->film_grain.chroma_auto_regression[0][1] =
816       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
817   dsp->film_grain.chroma_auto_regression[0][2] =
818       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
819   dsp->film_grain.chroma_auto_regression[0][3] =
820       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
821   dsp->film_grain.chroma_auto_regression[1][0] =
822       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
823   dsp->film_grain.chroma_auto_regression[1][1] =
824       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
825   dsp->film_grain.chroma_auto_regression[1][2] =
826       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
827   dsp->film_grain.chroma_auto_regression[1][3] =
828       ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
829 #endif
830 #ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseStripes
831   dsp->film_grain.construct_noise_stripes[0] =
832       ConstructNoiseStripes_C<10, int16_t>;
833   dsp->film_grain.construct_noise_stripes[1] =
834       ConstructNoiseStripesWithOverlap_C<10, int16_t>;
835 #endif
836 #ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseImageOverlap
837   dsp->film_grain.construct_noise_image_overlap =
838       ConstructNoiseImageOverlap_C<10, int16_t>;
839 #endif
840 #ifndef LIBGAV1_Dsp10bpp_FilmGrainInitializeScalingLutFunc
841   dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
842 #endif
843 #ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseLuma
844   dsp->film_grain.blend_noise_luma =
845       BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
846 #endif
847 #ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChroma
848   dsp->film_grain.blend_noise_chroma[0] =
849       BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
850 #endif
851 #ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChromaWithCfl
852   dsp->film_grain.blend_noise_chroma[1] =
853       BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
854 #endif
855 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
856 }
857 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
858 
859 }  // namespace
860 }  // namespace film_grain
861 
FilmGrainInit_C()862 void FilmGrainInit_C() {
863   film_grain::Init8bpp();
864 #if LIBGAV1_MAX_BITDEPTH >= 10
865   film_grain::Init10bpp();
866 #endif
867 }
868 
869 }  // namespace dsp
870 }  // namespace libgav1
871