1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "src/dsp/film_grain.h"
16
17 #include <algorithm>
18 #include <cassert>
19 #include <cstddef>
20 #include <cstdint>
21 #include <cstring>
22 #include <new>
23
24 #include "src/dsp/common.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/dsp/film_grain_common.h"
28 #include "src/utils/array_2d.h"
29 #include "src/utils/common.h"
30 #include "src/utils/compiler_attributes.h"
31 #include "src/utils/logging.h"
32
33 namespace libgav1 {
34 namespace dsp {
35 namespace film_grain {
36 namespace {
37
38 // Making this a template function prevents it from adding to code size when it
39 // is not placed in the DSP table. Most functions in the dsp directory change
40 // behavior by bitdepth, but because this one doesn't, it receives a dummy
41 // parameter with one enforced value, ensuring only one copy is made.
42 template <int singleton>
InitializeScalingLookupTable_C(int num_points,const uint8_t point_value[],const uint8_t point_scaling[],uint8_t scaling_lut[kScalingLookupTableSize])43 void InitializeScalingLookupTable_C(
44 int num_points, const uint8_t point_value[], const uint8_t point_scaling[],
45 uint8_t scaling_lut[kScalingLookupTableSize]) {
46 static_assert(singleton == 0,
47 "Improper instantiation of InitializeScalingLookupTable_C. "
48 "There should be only one copy of this function.");
49 if (num_points == 0) {
50 memset(scaling_lut, 0, sizeof(scaling_lut[0]) * kScalingLookupTableSize);
51 return;
52 }
53 static_assert(sizeof(scaling_lut[0]) == 1, "");
54 memset(scaling_lut, point_scaling[0], point_value[0]);
55 for (int i = 0; i < num_points - 1; ++i) {
56 const int delta_y = point_scaling[i + 1] - point_scaling[i];
57 const int delta_x = point_value[i + 1] - point_value[i];
58 const int delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
59 for (int x = 0; x < delta_x; ++x) {
60 const int v = point_scaling[i] + ((x * delta + 32768) >> 16);
61 assert(v >= 0 && v <= UINT8_MAX);
62 scaling_lut[point_value[i] + x] = v;
63 }
64 }
65 const uint8_t last_point_value = point_value[num_points - 1];
66 memset(&scaling_lut[last_point_value], point_scaling[num_points - 1],
67 kScalingLookupTableSize - last_point_value);
68 }
69
70 // Section 7.18.3.5.
71 // Performs a piecewise linear interpolation into the scaling table.
72 template <int bitdepth>
ScaleLut(const uint8_t scaling_lut[kScalingLookupTableSize],int index)73 int ScaleLut(const uint8_t scaling_lut[kScalingLookupTableSize], int index) {
74 const int shift = bitdepth - 8;
75 const int quotient = index >> shift;
76 const int remainder = index - (quotient << shift);
77 if (bitdepth == 8) {
78 assert(quotient < kScalingLookupTableSize);
79 return scaling_lut[quotient];
80 }
81 assert(quotient + 1 < kScalingLookupTableSize);
82 const int start = scaling_lut[quotient];
83 const int end = scaling_lut[quotient + 1];
84 return start + RightShiftWithRounding((end - start) * remainder, shift);
85 }
86
87 // Applies an auto-regressive filter to the white noise in luma_grain.
88 template <int bitdepth, typename GrainType>
ApplyAutoRegressiveFilterToLumaGrain_C(const FilmGrainParams & params,void * luma_grain_buffer)89 void ApplyAutoRegressiveFilterToLumaGrain_C(const FilmGrainParams& params,
90 void* luma_grain_buffer) {
91 auto* luma_grain = static_cast<GrainType*>(luma_grain_buffer);
92 const int grain_min = GetGrainMin<bitdepth>();
93 const int grain_max = GetGrainMax<bitdepth>();
94 const int auto_regression_coeff_lag = params.auto_regression_coeff_lag;
95 assert(auto_regression_coeff_lag > 0 && auto_regression_coeff_lag <= 3);
96 // A pictorial representation of the auto-regressive filter for various values
97 // of auto_regression_coeff_lag. The letter 'O' represents the current sample.
98 // (The filter always operates on the current sample with filter
99 // coefficient 1.) The letters 'X' represent the neighboring samples that the
100 // filter operates on.
101 //
102 // auto_regression_coeff_lag == 3:
103 // X X X X X X X
104 // X X X X X X X
105 // X X X X X X X
106 // X X X O
107 // auto_regression_coeff_lag == 2:
108 // X X X X X
109 // X X X X X
110 // X X O
111 // auto_regression_coeff_lag == 1:
112 // X X X
113 // X O
114 // auto_regression_coeff_lag == 0:
115 // O
116 //
117 // Note that if auto_regression_coeff_lag is 0, the filter is the identity
118 // filter and therefore can be skipped. This implementation assumes it is not
119 // called in that case.
120 const int shift = params.auto_regression_shift;
121 for (int y = kAutoRegressionBorder; y < kLumaHeight; ++y) {
122 for (int x = kAutoRegressionBorder; x < kLumaWidth - kAutoRegressionBorder;
123 ++x) {
124 int sum = 0;
125 int pos = 0;
126 int delta_row = -auto_regression_coeff_lag;
127 // The last iteration (delta_row == 0) is shorter and is handled
128 // separately.
129 do {
130 int delta_column = -auto_regression_coeff_lag;
131 do {
132 const int coeff = params.auto_regression_coeff_y[pos];
133 sum += luma_grain[(y + delta_row) * kLumaWidth + (x + delta_column)] *
134 coeff;
135 ++pos;
136 } while (++delta_column <= auto_regression_coeff_lag);
137 } while (++delta_row < 0);
138 // Last iteration: delta_row == 0.
139 {
140 int delta_column = -auto_regression_coeff_lag;
141 do {
142 const int coeff = params.auto_regression_coeff_y[pos];
143 sum += luma_grain[y * kLumaWidth + (x + delta_column)] * coeff;
144 ++pos;
145 } while (++delta_column < 0);
146 }
147 luma_grain[y * kLumaWidth + x] = Clip3(
148 luma_grain[y * kLumaWidth + x] + RightShiftWithRounding(sum, shift),
149 grain_min, grain_max);
150 }
151 }
152 }
153
154 template <int bitdepth, typename GrainType, int auto_regression_coeff_lag,
155 bool use_luma>
ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams & params,const void * luma_grain_buffer,int subsampling_x,int subsampling_y,void * u_grain_buffer,void * v_grain_buffer)156 void ApplyAutoRegressiveFilterToChromaGrains_C(const FilmGrainParams& params,
157 const void* luma_grain_buffer,
158 int subsampling_x,
159 int subsampling_y,
160 void* u_grain_buffer,
161 void* v_grain_buffer) {
162 static_assert(
163 auto_regression_coeff_lag >= 0 && auto_regression_coeff_lag <= 3,
164 "Unsupported autoregression lag for chroma.");
165 const auto* luma_grain = static_cast<const GrainType*>(luma_grain_buffer);
166 const int grain_min = GetGrainMin<bitdepth>();
167 const int grain_max = GetGrainMax<bitdepth>();
168 auto* u_grain = static_cast<GrainType*>(u_grain_buffer);
169 auto* v_grain = static_cast<GrainType*>(v_grain_buffer);
170 const int shift = params.auto_regression_shift;
171 const int chroma_height =
172 (subsampling_y == 0) ? kMaxChromaHeight : kMinChromaHeight;
173 const int chroma_width =
174 (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
175 for (int y = kAutoRegressionBorder; y < chroma_height; ++y) {
176 const int luma_y =
177 ((y - kAutoRegressionBorder) << subsampling_y) + kAutoRegressionBorder;
178 for (int x = kAutoRegressionBorder;
179 x < chroma_width - kAutoRegressionBorder; ++x) {
180 int sum_u = 0;
181 int sum_v = 0;
182 int pos = 0;
183 int delta_row = -auto_regression_coeff_lag;
184 do {
185 int delta_column = -auto_regression_coeff_lag;
186 do {
187 if (delta_row == 0 && delta_column == 0) {
188 break;
189 }
190 const int coeff_u = params.auto_regression_coeff_u[pos];
191 const int coeff_v = params.auto_regression_coeff_v[pos];
192 sum_u +=
193 u_grain[(y + delta_row) * chroma_width + (x + delta_column)] *
194 coeff_u;
195 sum_v +=
196 v_grain[(y + delta_row) * chroma_width + (x + delta_column)] *
197 coeff_v;
198 ++pos;
199 } while (++delta_column <= auto_regression_coeff_lag);
200 } while (++delta_row <= 0);
201 if (use_luma) {
202 int luma = 0;
203 const int luma_x = ((x - kAutoRegressionBorder) << subsampling_x) +
204 kAutoRegressionBorder;
205 int i = 0;
206 do {
207 int j = 0;
208 do {
209 luma += luma_grain[(luma_y + i) * kLumaWidth + (luma_x + j)];
210 } while (++j <= subsampling_x);
211 } while (++i <= subsampling_y);
212 luma = SubsampledValue(luma, subsampling_x + subsampling_y);
213 const int coeff_u = params.auto_regression_coeff_u[pos];
214 const int coeff_v = params.auto_regression_coeff_v[pos];
215 sum_u += luma * coeff_u;
216 sum_v += luma * coeff_v;
217 }
218 u_grain[y * chroma_width + x] = Clip3(
219 u_grain[y * chroma_width + x] + RightShiftWithRounding(sum_u, shift),
220 grain_min, grain_max);
221 v_grain[y * chroma_width + x] = Clip3(
222 v_grain[y * chroma_width + x] + RightShiftWithRounding(sum_v, shift),
223 grain_min, grain_max);
224 }
225 }
226 }
227
228 // This implementation is for the condition overlap_flag == false.
229 template <int bitdepth, typename GrainType>
ConstructNoiseStripes_C(const void * grain_buffer,int grain_seed,int width,int height,int subsampling_x,int subsampling_y,void * noise_stripes_buffer)230 void ConstructNoiseStripes_C(const void* grain_buffer, int grain_seed,
231 int width, int height, int subsampling_x,
232 int subsampling_y, void* noise_stripes_buffer) {
233 auto* noise_stripes =
234 static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
235 const auto* grain = static_cast<const GrainType*>(grain_buffer);
236 const int half_width = DivideBy2(width + 1);
237 const int half_height = DivideBy2(height + 1);
238 assert(half_width > 0);
239 assert(half_height > 0);
240 static_assert(kLumaWidth == kMaxChromaWidth,
241 "kLumaWidth width should be equal to kMaxChromaWidth");
242 const int grain_width =
243 (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
244 const int plane_width = (width + subsampling_x) >> subsampling_x;
245 constexpr int kNoiseStripeHeight = 34;
246 int luma_num = 0;
247 int y = 0;
248 do {
249 GrainType* const noise_stripe = (*noise_stripes)[luma_num];
250 uint16_t seed = grain_seed;
251 seed ^= ((luma_num * 37 + 178) & 255) << 8;
252 seed ^= ((luma_num * 173 + 105) & 255);
253 int x = 0;
254 do {
255 const int rand = GetFilmGrainRandomNumber(8, &seed);
256 const int offset_x = rand >> 4;
257 const int offset_y = rand & 15;
258 const int plane_offset_x =
259 (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
260 const int plane_offset_y =
261 (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
262 int i = 0;
263 do {
264 // Section 7.18.3.5 says:
265 // noiseStripe[ lumaNum ][ 0 ] is 34 samples high and w samples
266 // wide (a few additional samples across are actually written to
267 // the array, but these are never read) ...
268 //
269 // Note: The warning in the parentheses also applies to
270 // noiseStripe[ lumaNum ][ 1 ] and noiseStripe[ lumaNum ][ 2 ].
271 //
272 // Writes beyond the width of each row could happen below. To
273 // prevent those writes, we clip the number of pixels to copy against
274 // the remaining width.
275 // TODO(petersonab): Allocate aligned stripes with extra width to cover
276 // the size of the final stripe block, then remove this call to min.
277 const int copy_size =
278 std::min(kNoiseStripeHeight >> subsampling_x,
279 plane_width - (x << (1 - subsampling_x)));
280 memcpy(&noise_stripe[i * plane_width + (x << (1 - subsampling_x))],
281 &grain[(plane_offset_y + i) * grain_width + plane_offset_x],
282 copy_size * sizeof(noise_stripe[0]));
283 } while (++i < (kNoiseStripeHeight >> subsampling_y));
284 x += 16;
285 } while (x < half_width);
286
287 ++luma_num;
288 y += 16;
289 } while (y < half_height);
290 }
291
292 // This implementation is for the condition overlap_flag == true.
293 template <int bitdepth, typename GrainType>
ConstructNoiseStripesWithOverlap_C(const void * grain_buffer,int grain_seed,int width,int height,int subsampling_x,int subsampling_y,void * noise_stripes_buffer)294 void ConstructNoiseStripesWithOverlap_C(const void* grain_buffer,
295 int grain_seed, int width, int height,
296 int subsampling_x, int subsampling_y,
297 void* noise_stripes_buffer) {
298 auto* noise_stripes =
299 static_cast<Array2DView<GrainType>*>(noise_stripes_buffer);
300 const auto* grain = static_cast<const GrainType*>(grain_buffer);
301 const int half_width = DivideBy2(width + 1);
302 const int half_height = DivideBy2(height + 1);
303 assert(half_width > 0);
304 assert(half_height > 0);
305 static_assert(kLumaWidth == kMaxChromaWidth,
306 "kLumaWidth width should be equal to kMaxChromaWidth");
307 const int grain_width =
308 (subsampling_x == 0) ? kMaxChromaWidth : kMinChromaWidth;
309 const int plane_width = (width + subsampling_x) >> subsampling_x;
310 constexpr int kNoiseStripeHeight = 34;
311 int luma_num = 0;
312 int y = 0;
313 do {
314 GrainType* const noise_stripe = (*noise_stripes)[luma_num];
315 uint16_t seed = grain_seed;
316 seed ^= ((luma_num * 37 + 178) & 255) << 8;
317 seed ^= ((luma_num * 173 + 105) & 255);
318 // Begin special iteration for x == 0.
319 const int rand = GetFilmGrainRandomNumber(8, &seed);
320 const int offset_x = rand >> 4;
321 const int offset_y = rand & 15;
322 const int plane_offset_x =
323 (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
324 const int plane_offset_y =
325 (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
326 // The overlap computation only occurs when x > 0, so it is omitted here.
327 int i = 0;
328 do {
329 // TODO(petersonab): Allocate aligned stripes with extra width to cover
330 // the size of the final stripe block, then remove this call to min.
331 const int copy_size =
332 std::min(kNoiseStripeHeight >> subsampling_x, plane_width);
333 memcpy(&noise_stripe[i * plane_width],
334 &grain[(plane_offset_y + i) * grain_width + plane_offset_x],
335 copy_size * sizeof(noise_stripe[0]));
336 } while (++i < (kNoiseStripeHeight >> subsampling_y));
337 // End special iteration for x == 0.
338 for (int x = 16; x < half_width; x += 16) {
339 const int rand = GetFilmGrainRandomNumber(8, &seed);
340 const int offset_x = rand >> 4;
341 const int offset_y = rand & 15;
342 const int plane_offset_x =
343 (subsampling_x != 0) ? 6 + offset_x : 9 + offset_x * 2;
344 const int plane_offset_y =
345 (subsampling_y != 0) ? 6 + offset_y : 9 + offset_y * 2;
346 int i = 0;
347 do {
348 int j = 0;
349 int grain_sample =
350 grain[(plane_offset_y + i) * grain_width + plane_offset_x];
351 // The first pixel(s) of each segment of the noise_stripe are subject to
352 // the "overlap" computation.
353 if (subsampling_x == 0) {
354 // Corresponds to the line in the spec:
355 // if (j < 2 && x > 0)
356 // j = 0
357 int old = noise_stripe[i * plane_width + x * 2];
358 grain_sample = old * 27 + grain_sample * 17;
359 grain_sample =
360 Clip3(RightShiftWithRounding(grain_sample, 5),
361 GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
362 noise_stripe[i * plane_width + x * 2] = grain_sample;
363
364 // This check prevents overwriting for the iteration j = 1. The
365 // continue applies to the i-loop.
366 if (x * 2 + 1 >= plane_width) continue;
367 // j = 1
368 grain_sample =
369 grain[(plane_offset_y + i) * grain_width + plane_offset_x + 1];
370 old = noise_stripe[i * plane_width + x * 2 + 1];
371 grain_sample = old * 17 + grain_sample * 27;
372 grain_sample =
373 Clip3(RightShiftWithRounding(grain_sample, 5),
374 GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
375 noise_stripe[i * plane_width + x * 2 + 1] = grain_sample;
376 j = 2;
377 } else {
378 // Corresponds to the line in the spec:
379 // if (j == 0 && x > 0)
380 const int old = noise_stripe[i * plane_width + x];
381 grain_sample = old * 23 + grain_sample * 22;
382 grain_sample =
383 Clip3(RightShiftWithRounding(grain_sample, 5),
384 GetGrainMin<bitdepth>(), GetGrainMax<bitdepth>());
385 noise_stripe[i * plane_width + x] = grain_sample;
386 j = 1;
387 }
388 // The following covers the rest of the loop over j as described in the
389 // spec.
390 //
391 // Section 7.18.3.5 says:
392 // noiseStripe[ lumaNum ][ 0 ] is 34 samples high and w samples
393 // wide (a few additional samples across are actually written to
394 // the array, but these are never read) ...
395 //
396 // Note: The warning in the parentheses also applies to
397 // noiseStripe[ lumaNum ][ 1 ] and noiseStripe[ lumaNum ][ 2 ].
398 //
399 // Writes beyond the width of each row could happen below. To
400 // prevent those writes, we clip the number of pixels to copy against
401 // the remaining width.
402 // TODO(petersonab): Allocate aligned stripes with extra width to cover
403 // the size of the final stripe block, then remove this call to min.
404 const int copy_size =
405 std::min(kNoiseStripeHeight >> subsampling_x,
406 plane_width - (x << (1 - subsampling_x))) -
407 j;
408 memcpy(&noise_stripe[i * plane_width + (x << (1 - subsampling_x)) + j],
409 &grain[(plane_offset_y + i) * grain_width + plane_offset_x + j],
410 copy_size * sizeof(noise_stripe[0]));
411 } while (++i < (kNoiseStripeHeight >> subsampling_y));
412 }
413
414 ++luma_num;
415 y += 16;
416 } while (y < half_height);
417 }
418
419 template <int bitdepth, typename GrainType>
WriteOverlapLine_C(const GrainType * noise_stripe_row,const GrainType * noise_stripe_row_prev,int plane_width,int grain_coeff,int old_coeff,GrainType * noise_image_row)420 inline void WriteOverlapLine_C(const GrainType* noise_stripe_row,
421 const GrainType* noise_stripe_row_prev,
422 int plane_width, int grain_coeff, int old_coeff,
423 GrainType* noise_image_row) {
424 int x = 0;
425 do {
426 int grain = noise_stripe_row[x];
427 const int old = noise_stripe_row_prev[x];
428 grain = old * old_coeff + grain * grain_coeff;
429 grain = Clip3(RightShiftWithRounding(grain, 5), GetGrainMin<bitdepth>(),
430 GetGrainMax<bitdepth>());
431 noise_image_row[x] = grain;
432 } while (++x < plane_width);
433 }
434
435 template <int bitdepth, typename GrainType>
ConstructNoiseImageOverlap_C(const void * noise_stripes_buffer,int width,int height,int subsampling_x,int subsampling_y,void * noise_image_buffer)436 void ConstructNoiseImageOverlap_C(const void* noise_stripes_buffer, int width,
437 int height, int subsampling_x,
438 int subsampling_y, void* noise_image_buffer) {
439 const auto* noise_stripes =
440 static_cast<const Array2DView<GrainType>*>(noise_stripes_buffer);
441 auto* noise_image = static_cast<Array2D<GrainType>*>(noise_image_buffer);
442 const int plane_width = (width + subsampling_x) >> subsampling_x;
443 const int plane_height = (height + subsampling_y) >> subsampling_y;
444 const int stripe_height = 32 >> subsampling_y;
445 const int stripe_mask = stripe_height - 1;
446 int y = stripe_height;
447 int luma_num = 1;
448 if (subsampling_y == 0) {
449 // Begin complete stripes section. This is when we are guaranteed to have
450 // two overlap rows in each stripe.
451 for (; y < (plane_height & ~stripe_mask); ++luma_num, y += stripe_height) {
452 const GrainType* noise_stripe = (*noise_stripes)[luma_num];
453 const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
454 // First overlap row.
455 WriteOverlapLine_C<bitdepth>(noise_stripe,
456 &noise_stripe_prev[32 * plane_width],
457 plane_width, 17, 27, (*noise_image)[y]);
458 // Second overlap row.
459 WriteOverlapLine_C<bitdepth>(&noise_stripe[plane_width],
460 &noise_stripe_prev[(32 + 1) * plane_width],
461 plane_width, 27, 17, (*noise_image)[y + 1]);
462 }
463 // End complete stripes section.
464
465 const int remaining_height = plane_height - y;
466 // Either one partial stripe remains (remaining_height > 0),
467 // OR image is less than one stripe high (remaining_height < 0),
468 // OR all stripes are completed (remaining_height == 0).
469 if (remaining_height <= 0) {
470 return;
471 }
472 const GrainType* noise_stripe = (*noise_stripes)[luma_num];
473 const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
474 WriteOverlapLine_C<bitdepth>(noise_stripe,
475 &noise_stripe_prev[32 * plane_width],
476 plane_width, 17, 27, (*noise_image)[y]);
477
478 // Check if second overlap row is in the image.
479 if (remaining_height > 1) {
480 WriteOverlapLine_C<bitdepth>(&noise_stripe[plane_width],
481 &noise_stripe_prev[(32 + 1) * plane_width],
482 plane_width, 27, 17, (*noise_image)[y + 1]);
483 }
484 } else { // |subsampling_y| == 1
485 // No special checks needed for partial stripes, because if one exists, the
486 // first and only overlap row is guaranteed to exist.
487 for (; y < plane_height; ++luma_num, y += stripe_height) {
488 const GrainType* noise_stripe = (*noise_stripes)[luma_num];
489 const GrainType* noise_stripe_prev = (*noise_stripes)[luma_num - 1];
490 WriteOverlapLine_C<bitdepth>(noise_stripe,
491 &noise_stripe_prev[16 * plane_width],
492 plane_width, 22, 23, (*noise_image)[y]);
493 }
494 }
495 }
496
497 template <int bitdepth, typename GrainType, typename Pixel>
BlendNoiseWithImageLuma_C(const void * noise_image_ptr,int min_value,int max_luma,int scaling_shift,int width,int height,int start_height,const uint8_t scaling_lut_y[kScalingLookupTableSize],const void * source_plane_y,ptrdiff_t source_stride_y,void * dest_plane_y,ptrdiff_t dest_stride_y)498 void BlendNoiseWithImageLuma_C(
499 const void* noise_image_ptr, int min_value, int max_luma, int scaling_shift,
500 int width, int height, int start_height,
501 const uint8_t scaling_lut_y[kScalingLookupTableSize],
502 const void* source_plane_y, ptrdiff_t source_stride_y, void* dest_plane_y,
503 ptrdiff_t dest_stride_y) {
504 const auto* noise_image =
505 static_cast<const Array2D<GrainType>*>(noise_image_ptr);
506 const auto* in_y = static_cast<const Pixel*>(source_plane_y);
507 source_stride_y /= sizeof(Pixel);
508 auto* out_y = static_cast<Pixel*>(dest_plane_y);
509 dest_stride_y /= sizeof(Pixel);
510
511 int y = 0;
512 do {
513 int x = 0;
514 do {
515 const int orig = in_y[y * source_stride_y + x];
516 int noise = noise_image[kPlaneY][y + start_height][x];
517 noise = RightShiftWithRounding(
518 ScaleLut<bitdepth>(scaling_lut_y, orig) * noise, scaling_shift);
519 out_y[y * dest_stride_y + x] = Clip3(orig + noise, min_value, max_luma);
520 } while (++x < width);
521 } while (++y < height);
522 }
523
524 // This function is for the case params_.chroma_scaling_from_luma == false.
525 template <int bitdepth, typename GrainType, typename Pixel>
BlendNoiseWithImageChroma_C(Plane plane,const FilmGrainParams & params,const void * noise_image_ptr,int min_value,int max_chroma,int width,int height,int start_height,int subsampling_x,int subsampling_y,const uint8_t scaling_lut_uv[kScalingLookupTableSize],const void * source_plane_y,ptrdiff_t source_stride_y,const void * source_plane_uv,ptrdiff_t source_stride_uv,void * dest_plane_uv,ptrdiff_t dest_stride_uv)526 void BlendNoiseWithImageChroma_C(
527 Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
528 int min_value, int max_chroma, int width, int height, int start_height,
529 int subsampling_x, int subsampling_y,
530 const uint8_t scaling_lut_uv[kScalingLookupTableSize],
531 const void* source_plane_y, ptrdiff_t source_stride_y,
532 const void* source_plane_uv, ptrdiff_t source_stride_uv,
533 void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
534 const auto* noise_image =
535 static_cast<const Array2D<GrainType>*>(noise_image_ptr);
536
537 const int chroma_width = (width + subsampling_x) >> subsampling_x;
538 const int chroma_height = (height + subsampling_y) >> subsampling_y;
539
540 const auto* in_y = static_cast<const Pixel*>(source_plane_y);
541 source_stride_y /= sizeof(Pixel);
542 const auto* in_uv = static_cast<const Pixel*>(source_plane_uv);
543 source_stride_uv /= sizeof(Pixel);
544 auto* out_uv = static_cast<Pixel*>(dest_plane_uv);
545 dest_stride_uv /= sizeof(Pixel);
546
547 const int offset = (plane == kPlaneU) ? params.u_offset : params.v_offset;
548 const int luma_multiplier =
549 (plane == kPlaneU) ? params.u_luma_multiplier : params.v_luma_multiplier;
550 const int multiplier =
551 (plane == kPlaneU) ? params.u_multiplier : params.v_multiplier;
552
553 const int scaling_shift = params.chroma_scaling;
554 start_height >>= subsampling_y;
555 int y = 0;
556 do {
557 int x = 0;
558 do {
559 const int luma_x = x << subsampling_x;
560 const int luma_y = y << subsampling_y;
561 const int luma_next_x = std::min(luma_x + 1, width - 1);
562 int average_luma;
563 if (subsampling_x != 0) {
564 average_luma = RightShiftWithRounding(
565 in_y[luma_y * source_stride_y + luma_x] +
566 in_y[luma_y * source_stride_y + luma_next_x],
567 1);
568 } else {
569 average_luma = in_y[luma_y * source_stride_y + luma_x];
570 }
571 const int orig = in_uv[y * source_stride_uv + x];
572 const int combined = average_luma * luma_multiplier + orig * multiplier;
573 const int merged =
574 Clip3((combined >> 6) + LeftShift(offset, bitdepth - 8), 0,
575 (1 << bitdepth) - 1);
576 int noise = noise_image[plane][y + start_height][x];
577 noise = RightShiftWithRounding(
578 ScaleLut<bitdepth>(scaling_lut_uv, merged) * noise, scaling_shift);
579 out_uv[y * dest_stride_uv + x] =
580 Clip3(orig + noise, min_value, max_chroma);
581 } while (++x < chroma_width);
582 } while (++y < chroma_height);
583 }
584
585 // This function is for the case params_.chroma_scaling_from_luma == true.
586 // This further implies that scaling_lut_u == scaling_lut_v == scaling_lut_y.
587 template <int bitdepth, typename GrainType, typename Pixel>
BlendNoiseWithImageChromaWithCfl_C(Plane plane,const FilmGrainParams & params,const void * noise_image_ptr,int min_value,int max_chroma,int width,int height,int start_height,int subsampling_x,int subsampling_y,const uint8_t scaling_lut[kScalingLookupTableSize],const void * source_plane_y,ptrdiff_t source_stride_y,const void * source_plane_uv,ptrdiff_t source_stride_uv,void * dest_plane_uv,ptrdiff_t dest_stride_uv)588 void BlendNoiseWithImageChromaWithCfl_C(
589 Plane plane, const FilmGrainParams& params, const void* noise_image_ptr,
590 int min_value, int max_chroma, int width, int height, int start_height,
591 int subsampling_x, int subsampling_y,
592 const uint8_t scaling_lut[kScalingLookupTableSize],
593 const void* source_plane_y, ptrdiff_t source_stride_y,
594 const void* source_plane_uv, ptrdiff_t source_stride_uv,
595 void* dest_plane_uv, ptrdiff_t dest_stride_uv) {
596 const auto* noise_image =
597 static_cast<const Array2D<GrainType>*>(noise_image_ptr);
598 const auto* in_y = static_cast<const Pixel*>(source_plane_y);
599 source_stride_y /= sizeof(Pixel);
600 const auto* in_uv = static_cast<const Pixel*>(source_plane_uv);
601 source_stride_uv /= sizeof(Pixel);
602 auto* out_uv = static_cast<Pixel*>(dest_plane_uv);
603 dest_stride_uv /= sizeof(Pixel);
604
605 const int chroma_width = (width + subsampling_x) >> subsampling_x;
606 const int chroma_height = (height + subsampling_y) >> subsampling_y;
607 const int scaling_shift = params.chroma_scaling;
608 start_height >>= subsampling_y;
609 int y = 0;
610 do {
611 int x = 0;
612 do {
613 const int luma_x = x << subsampling_x;
614 const int luma_y = y << subsampling_y;
615 const int luma_next_x = std::min(luma_x + 1, width - 1);
616 int average_luma;
617 if (subsampling_x != 0) {
618 average_luma = RightShiftWithRounding(
619 in_y[luma_y * source_stride_y + luma_x] +
620 in_y[luma_y * source_stride_y + luma_next_x],
621 1);
622 } else {
623 average_luma = in_y[luma_y * source_stride_y + luma_x];
624 }
625 const int orig_uv = in_uv[y * source_stride_uv + x];
626 int noise_uv = noise_image[plane][y + start_height][x];
627 noise_uv = RightShiftWithRounding(
628 ScaleLut<bitdepth>(scaling_lut, average_luma) * noise_uv,
629 scaling_shift);
630 out_uv[y * dest_stride_uv + x] =
631 Clip3(orig_uv + noise_uv, min_value, max_chroma);
632 } while (++x < chroma_width);
633 } while (++y < chroma_height);
634 }
635
Init8bpp()636 void Init8bpp() {
637 Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
638 assert(dsp != nullptr);
639 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
640 // LumaAutoRegressionFunc
641 dsp->film_grain.luma_auto_regression[0] =
642 ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
643 dsp->film_grain.luma_auto_regression[1] =
644 ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
645 dsp->film_grain.luma_auto_regression[2] =
646 ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
647
648 // ChromaAutoRegressionFunc
649 // Chroma autoregression should never be called when lag is 0 and use_luma is
650 // false.
651 dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
652 dsp->film_grain.chroma_auto_regression[0][1] =
653 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
654 dsp->film_grain.chroma_auto_regression[0][2] =
655 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
656 dsp->film_grain.chroma_auto_regression[0][3] =
657 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
658 dsp->film_grain.chroma_auto_regression[1][0] =
659 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
660 dsp->film_grain.chroma_auto_regression[1][1] =
661 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
662 dsp->film_grain.chroma_auto_regression[1][2] =
663 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
664 dsp->film_grain.chroma_auto_regression[1][3] =
665 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
666
667 // ConstructNoiseStripesFunc
668 dsp->film_grain.construct_noise_stripes[0] =
669 ConstructNoiseStripes_C<8, int8_t>;
670 dsp->film_grain.construct_noise_stripes[1] =
671 ConstructNoiseStripesWithOverlap_C<8, int8_t>;
672
673 // ConstructNoiseImageOverlapFunc
674 dsp->film_grain.construct_noise_image_overlap =
675 ConstructNoiseImageOverlap_C<8, int8_t>;
676
677 // InitializeScalingLutFunc
678 dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
679
680 // BlendNoiseWithImageLumaFunc
681 dsp->film_grain.blend_noise_luma =
682 BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
683
684 // BlendNoiseWithImageChromaFunc
685 dsp->film_grain.blend_noise_chroma[0] =
686 BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
687 dsp->film_grain.blend_noise_chroma[1] =
688 BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
689 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
690 static_cast<void>(dsp);
691 #ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionLuma
692 dsp->film_grain.luma_auto_regression[0] =
693 ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
694 dsp->film_grain.luma_auto_regression[1] =
695 ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
696 dsp->film_grain.luma_auto_regression[2] =
697 ApplyAutoRegressiveFilterToLumaGrain_C<8, int8_t>;
698 #endif
699 #ifndef LIBGAV1_Dsp8bpp_FilmGrainAutoregressionChroma
700 // Chroma autoregression should never be called when lag is 0 and use_luma is
701 // false.
702 dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
703 dsp->film_grain.chroma_auto_regression[0][1] =
704 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, false>;
705 dsp->film_grain.chroma_auto_regression[0][2] =
706 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, false>;
707 dsp->film_grain.chroma_auto_regression[0][3] =
708 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, false>;
709 dsp->film_grain.chroma_auto_regression[1][0] =
710 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 0, true>;
711 dsp->film_grain.chroma_auto_regression[1][1] =
712 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 1, true>;
713 dsp->film_grain.chroma_auto_regression[1][2] =
714 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 2, true>;
715 dsp->film_grain.chroma_auto_regression[1][3] =
716 ApplyAutoRegressiveFilterToChromaGrains_C<8, int8_t, 3, true>;
717 #endif
718 #ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseStripes
719 dsp->film_grain.construct_noise_stripes[0] =
720 ConstructNoiseStripes_C<8, int8_t>;
721 dsp->film_grain.construct_noise_stripes[1] =
722 ConstructNoiseStripesWithOverlap_C<8, int8_t>;
723 #endif
724 #ifndef LIBGAV1_Dsp8bpp_FilmGrainConstructNoiseImageOverlap
725 dsp->film_grain.construct_noise_image_overlap =
726 ConstructNoiseImageOverlap_C<8, int8_t>;
727 #endif
728 #ifndef LIBGAV1_Dsp8bpp_FilmGrainInitializeScalingLutFunc
729 dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
730 #endif
731 #ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseLuma
732 dsp->film_grain.blend_noise_luma =
733 BlendNoiseWithImageLuma_C<8, int8_t, uint8_t>;
734 #endif
735 #ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChroma
736 dsp->film_grain.blend_noise_chroma[0] =
737 BlendNoiseWithImageChroma_C<8, int8_t, uint8_t>;
738 #endif
739 #ifndef LIBGAV1_Dsp8bpp_FilmGrainBlendNoiseChromaWithCfl
740 dsp->film_grain.blend_noise_chroma[1] =
741 BlendNoiseWithImageChromaWithCfl_C<8, int8_t, uint8_t>;
742 #endif
743 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
744 }
745
746 #if LIBGAV1_MAX_BITDEPTH >= 10
Init10bpp()747 void Init10bpp() {
748 Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
749 assert(dsp != nullptr);
750 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
751
752 // LumaAutoRegressionFunc
753 dsp->film_grain.luma_auto_regression[0] =
754 ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
755 dsp->film_grain.luma_auto_regression[1] =
756 ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
757 dsp->film_grain.luma_auto_regression[2] =
758 ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
759
760 // ChromaAutoRegressionFunc
761 // Chroma autoregression should never be called when lag is 0 and use_luma is
762 // false.
763 dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
764 dsp->film_grain.chroma_auto_regression[0][1] =
765 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
766 dsp->film_grain.chroma_auto_regression[0][2] =
767 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
768 dsp->film_grain.chroma_auto_regression[0][3] =
769 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
770 dsp->film_grain.chroma_auto_regression[1][0] =
771 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
772 dsp->film_grain.chroma_auto_regression[1][1] =
773 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
774 dsp->film_grain.chroma_auto_regression[1][2] =
775 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
776 dsp->film_grain.chroma_auto_regression[1][3] =
777 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
778
779 // ConstructNoiseStripesFunc
780 dsp->film_grain.construct_noise_stripes[0] =
781 ConstructNoiseStripes_C<10, int16_t>;
782 dsp->film_grain.construct_noise_stripes[1] =
783 ConstructNoiseStripesWithOverlap_C<10, int16_t>;
784
785 // ConstructNoiseImageOverlapFunc
786 dsp->film_grain.construct_noise_image_overlap =
787 ConstructNoiseImageOverlap_C<10, int16_t>;
788
789 // InitializeScalingLutFunc
790 dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
791
792 // BlendNoiseWithImageLumaFunc
793 dsp->film_grain.blend_noise_luma =
794 BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
795
796 // BlendNoiseWithImageChromaFunc
797 dsp->film_grain.blend_noise_chroma[0] =
798 BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
799 dsp->film_grain.blend_noise_chroma[1] =
800 BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
801 #else // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
802 static_cast<void>(dsp);
803 #ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionLuma
804 dsp->film_grain.luma_auto_regression[0] =
805 ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
806 dsp->film_grain.luma_auto_regression[1] =
807 ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
808 dsp->film_grain.luma_auto_regression[2] =
809 ApplyAutoRegressiveFilterToLumaGrain_C<10, int16_t>;
810 #endif
811 #ifndef LIBGAV1_Dsp10bpp_FilmGrainAutoregressionChroma
812 // Chroma autoregression should never be called when lag is 0 and use_luma is
813 // false.
814 dsp->film_grain.chroma_auto_regression[0][0] = nullptr;
815 dsp->film_grain.chroma_auto_regression[0][1] =
816 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, false>;
817 dsp->film_grain.chroma_auto_regression[0][2] =
818 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, false>;
819 dsp->film_grain.chroma_auto_regression[0][3] =
820 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, false>;
821 dsp->film_grain.chroma_auto_regression[1][0] =
822 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 0, true>;
823 dsp->film_grain.chroma_auto_regression[1][1] =
824 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 1, true>;
825 dsp->film_grain.chroma_auto_regression[1][2] =
826 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 2, true>;
827 dsp->film_grain.chroma_auto_regression[1][3] =
828 ApplyAutoRegressiveFilterToChromaGrains_C<10, int16_t, 3, true>;
829 #endif
830 #ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseStripes
831 dsp->film_grain.construct_noise_stripes[0] =
832 ConstructNoiseStripes_C<10, int16_t>;
833 dsp->film_grain.construct_noise_stripes[1] =
834 ConstructNoiseStripesWithOverlap_C<10, int16_t>;
835 #endif
836 #ifndef LIBGAV1_Dsp10bpp_FilmGrainConstructNoiseImageOverlap
837 dsp->film_grain.construct_noise_image_overlap =
838 ConstructNoiseImageOverlap_C<10, int16_t>;
839 #endif
840 #ifndef LIBGAV1_Dsp10bpp_FilmGrainInitializeScalingLutFunc
841 dsp->film_grain.initialize_scaling_lut = InitializeScalingLookupTable_C<0>;
842 #endif
843 #ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseLuma
844 dsp->film_grain.blend_noise_luma =
845 BlendNoiseWithImageLuma_C<10, int16_t, uint16_t>;
846 #endif
847 #ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChroma
848 dsp->film_grain.blend_noise_chroma[0] =
849 BlendNoiseWithImageChroma_C<10, int16_t, uint16_t>;
850 #endif
851 #ifndef LIBGAV1_Dsp10bpp_FilmGrainBlendNoiseChromaWithCfl
852 dsp->film_grain.blend_noise_chroma[1] =
853 BlendNoiseWithImageChromaWithCfl_C<10, int16_t, uint16_t>;
854 #endif
855 #endif // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
856 }
857 #endif // LIBGAV1_MAX_BITDEPTH >= 10
858
859 } // namespace
860 } // namespace film_grain
861
FilmGrainInit_C()862 void FilmGrainInit_C() {
863 film_grain::Init8bpp();
864 #if LIBGAV1_MAX_BITDEPTH >= 10
865 film_grain::Init10bpp();
866 #endif
867 }
868
869 } // namespace dsp
870 } // namespace libgav1
871