1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "src/dsp/loop_filter.h"
16 
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 
22 #include "src/dsp/dsp.h"
23 #include "src/utils/common.h"
24 
25 namespace libgav1 {
26 namespace dsp {
27 namespace {
28 
29 // 7.14.6.1.
30 template <int bitdepth, typename Pixel>
31 struct LoopFilterFuncs_C {
32   LoopFilterFuncs_C() = delete;
33 
34   static constexpr int kMaxPixel = (1 << bitdepth) - 1;
35   static constexpr int kMinSignedPixel = -(1 << (bitdepth - 1));
36   static constexpr int kMaxSignedPixel = (1 << (bitdepth - 1)) - 1;
37   static constexpr int kFlatThresh = 1 << (bitdepth - 8);
38 
39   static void Vertical4(void* dest, ptrdiff_t stride, int outer_thresh,
40                         int inner_thresh, int hev_thresh);
41   static void Horizontal4(void* dest, ptrdiff_t stride, int outer_thresh,
42                           int inner_thresh, int hev_thresh);
43   static void Vertical6(void* dest, ptrdiff_t stride, int outer_thresh,
44                         int inner_thresh, int hev_thresh);
45   static void Horizontal6(void* dest, ptrdiff_t stride, int outer_thresh,
46                           int inner_thresh, int hev_thresh);
47   static void Vertical8(void* dest, ptrdiff_t stride, int outer_thresh,
48                         int inner_thresh, int hev_thresh);
49   static void Horizontal8(void* dest, ptrdiff_t stride, int outer_thresh,
50                           int inner_thresh, int hev_thresh);
51   static void Vertical14(void* dest, ptrdiff_t stride, int outer_thresh,
52                          int inner_thresh, int hev_thresh);
53   static void Horizontal14(void* dest, ptrdiff_t stride, int outer_thresh,
54                            int inner_thresh, int hev_thresh);
55 };
56 
AdjustThresholds(const int bitdepth,int * const outer_thresh,int * const inner_thresh,int * const hev_thresh)57 inline void AdjustThresholds(const int bitdepth, int* const outer_thresh,
58                              int* const inner_thresh, int* const hev_thresh) {
59   *outer_thresh <<= bitdepth - 8;
60   *inner_thresh <<= bitdepth - 8;
61   *hev_thresh <<= bitdepth - 8;
62 }
63 
64 //------------------------------------------------------------------------------
65 // 4-tap filters
66 
67 // 7.14.6.2.
68 template <typename Pixel>
NeedsFilter4(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)69 inline bool NeedsFilter4(const Pixel* p, ptrdiff_t step, int outer_thresh,
70                          int inner_thresh) {
71   const int p1 = p[-2 * step], p0 = p[-step];
72   const int q0 = p[0], q1 = p[step];
73   return std::abs(p1 - p0) <= inner_thresh &&
74          std::abs(q1 - q0) <= inner_thresh &&
75          std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
76 }
77 
78 // 7.14.6.2.
79 template <typename Pixel>
Hev(const Pixel * p,ptrdiff_t step,int thresh)80 inline bool Hev(const Pixel* p, ptrdiff_t step, int thresh) {
81   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
82   return (std::abs(p1 - p0) > thresh) || (std::abs(q1 - q0) > thresh);
83 }
84 
85 // 7.14.6.3.
86 // 4 pixels in, 2 pixels out.
87 template <int bitdepth, typename Pixel>
Filter2_C(Pixel * p,ptrdiff_t step)88 inline void Filter2_C(Pixel* p, ptrdiff_t step) {
89   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
90   const int min_signed_val =
91       LoopFilterFuncs_C<bitdepth, Pixel>::kMinSignedPixel;
92   const int max_signed_val =
93       LoopFilterFuncs_C<bitdepth, Pixel>::kMaxSignedPixel;
94   // 8bpp: [-893,892], 10bpp: [-3581,3580], 12bpp [-14333,14332]
95   const int a = 3 * (q0 - p0) + Clip3(p1 - q1, min_signed_val, max_signed_val);
96   // 8bpp: [-16,15], 10bpp: [-64,63], 12bpp: [-256,255]
97   const int a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3;
98   const int a2 = Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
99   const int max_unsigned_val = LoopFilterFuncs_C<bitdepth, Pixel>::kMaxPixel;
100   p[-step] = Clip3(p0 + a2, 0, max_unsigned_val);
101   p[0] = Clip3(q0 - a1, 0, max_unsigned_val);
102 }
103 
104 // 7.14.6.3.
105 // 4 pixels in, 4 pixels out.
106 template <int bitdepth, typename Pixel>
Filter4_C(Pixel * p,ptrdiff_t step)107 inline void Filter4_C(Pixel* p, ptrdiff_t step) {
108   const int p1 = p[-2 * step], p0 = p[-step], q0 = p[0], q1 = p[step];
109   const int a = 3 * (q0 - p0);
110   const int min_signed_val =
111       LoopFilterFuncs_C<bitdepth, Pixel>::kMinSignedPixel;
112   const int max_signed_val =
113       LoopFilterFuncs_C<bitdepth, Pixel>::kMaxSignedPixel;
114   const int a1 = Clip3(a + 4, min_signed_val, max_signed_val) >> 3;
115   const int a2 = Clip3(a + 3, min_signed_val, max_signed_val) >> 3;
116   const int a3 = (a1 + 1) >> 1;
117   const int max_unsigned_val = LoopFilterFuncs_C<bitdepth, Pixel>::kMaxPixel;
118   p[-2 * step] = Clip3(p1 + a3, 0, max_unsigned_val);
119   p[-1 * step] = Clip3(p0 + a2, 0, max_unsigned_val);
120   p[0 * step] = Clip3(q0 - a1, 0, max_unsigned_val);
121   p[1 * step] = Clip3(q1 - a3, 0, max_unsigned_val);
122 }
123 
124 template <int bitdepth, typename Pixel>
Vertical4(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)125 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical4(void* dest, ptrdiff_t stride,
126                                                    int outer_thresh,
127                                                    int inner_thresh,
128                                                    int hev_thresh) {
129   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
130   auto* dst = static_cast<Pixel*>(dest);
131   stride /= sizeof(Pixel);
132   for (int i = 0; i < 4; ++i) {
133     if (NeedsFilter4(dst, 1, outer_thresh, inner_thresh)) {
134       if (Hev(dst, 1, hev_thresh)) {
135         Filter2_C<bitdepth>(dst, 1);
136       } else {
137         Filter4_C<bitdepth>(dst, 1);
138       }
139     }
140     dst += stride;
141   }
142 }
143 
144 template <int bitdepth, typename Pixel>
Horizontal4(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)145 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal4(void* dest,
146                                                      ptrdiff_t stride,
147                                                      int outer_thresh,
148                                                      int inner_thresh,
149                                                      int hev_thresh) {
150   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
151   auto* dst = static_cast<Pixel*>(dest);
152   stride /= sizeof(Pixel);
153   for (int i = 0; i < 4; ++i) {
154     if (NeedsFilter4(dst, stride, outer_thresh, inner_thresh)) {
155       if (Hev(dst, stride, hev_thresh)) {
156         Filter2_C<bitdepth>(dst, stride);
157       } else {
158         Filter4_C<bitdepth>(dst, stride);
159       }
160     }
161     ++dst;
162   }
163 }
164 
165 //------------------------------------------------------------------------------
166 // 5-tap (chroma) filters
167 
168 // 7.14.6.2.
169 template <typename Pixel>
NeedsFilter6(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)170 inline bool NeedsFilter6(const Pixel* p, ptrdiff_t step, int outer_thresh,
171                          int inner_thresh) {
172   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
173   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
174   return std::abs(p2 - p1) <= inner_thresh &&
175          std::abs(p1 - p0) <= inner_thresh &&
176          std::abs(q1 - q0) <= inner_thresh &&
177          std::abs(q2 - q1) <= inner_thresh &&
178          std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
179 }
180 
181 // 7.14.6.2.
182 template <typename Pixel>
IsFlat3(const Pixel * p,ptrdiff_t step,int flat_thresh)183 inline bool IsFlat3(const Pixel* p, ptrdiff_t step, int flat_thresh) {
184   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
185   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
186   return std::abs(p1 - p0) <= flat_thresh && std::abs(q1 - q0) <= flat_thresh &&
187          std::abs(p2 - p0) <= flat_thresh && std::abs(q2 - q0) <= flat_thresh;
188 }
189 
190 template <typename Pixel>
ApplyFilter6(int filter_value)191 inline Pixel ApplyFilter6(int filter_value) {
192   return static_cast<Pixel>(RightShiftWithRounding(filter_value, 3));
193 }
194 
195 // 7.14.6.4.
196 // 6 pixels in, 4 pixels out.
197 template <typename Pixel>
Filter6_C(Pixel * p,ptrdiff_t step)198 inline void Filter6_C(Pixel* p, ptrdiff_t step) {
199   const int p2 = p[-3 * step], p1 = p[-2 * step], p0 = p[-step];
200   const int q0 = p[0], q1 = p[step], q2 = p[2 * step];
201   const int a1 = 2 * p1;
202   const int a0 = 2 * p0;
203   const int b0 = 2 * q0;
204   const int b1 = 2 * q1;
205   // The max is 8 * max_pixel + 4 for the rounder.
206   // 8bpp: 2044 (11 bits), 10bpp: 8188 (13 bits), 12bpp: 32764 (15 bits)
207   p[-2 * step] = ApplyFilter6<Pixel>(3 * p2 + a1 + a0 + q0);
208   p[-1 * step] = ApplyFilter6<Pixel>(p2 + a1 + a0 + b0 + q1);
209   p[0 * step] = ApplyFilter6<Pixel>(p1 + a0 + b0 + b1 + q2);
210   p[1 * step] = ApplyFilter6<Pixel>(p0 + b0 + b1 + 3 * q2);
211 }
212 
213 template <int bitdepth, typename Pixel>
Vertical6(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)214 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical6(void* dest, ptrdiff_t stride,
215                                                    int outer_thresh,
216                                                    int inner_thresh,
217                                                    int hev_thresh) {
218   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
219   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
220   auto* dst = static_cast<Pixel*>(dest);
221   stride /= sizeof(Pixel);
222   for (int i = 0; i < 4; ++i) {
223     if (NeedsFilter6(dst, 1, outer_thresh, inner_thresh)) {
224       if (IsFlat3(dst, 1, flat_thresh)) {
225         Filter6_C(dst, 1);
226       } else if (Hev(dst, 1, hev_thresh)) {
227         Filter2_C<bitdepth>(dst, 1);
228       } else {
229         Filter4_C<bitdepth>(dst, 1);
230       }
231     }
232     dst += stride;
233   }
234 }
235 
236 template <int bitdepth, typename Pixel>
Horizontal6(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)237 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal6(void* dest,
238                                                      ptrdiff_t stride,
239                                                      int outer_thresh,
240                                                      int inner_thresh,
241                                                      int hev_thresh) {
242   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
243   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
244   auto* dst = static_cast<Pixel*>(dest);
245   stride /= sizeof(Pixel);
246   for (int i = 0; i < 4; ++i) {
247     if (NeedsFilter6(dst, stride, outer_thresh, inner_thresh)) {
248       if (IsFlat3(dst, stride, flat_thresh)) {
249         Filter6_C(dst, stride);
250       } else if (Hev(dst, stride, hev_thresh)) {
251         Filter2_C<bitdepth>(dst, stride);
252       } else {
253         Filter4_C<bitdepth>(dst, stride);
254       }
255     }
256     ++dst;
257   }
258 }
259 
260 //------------------------------------------------------------------------------
261 // 7-tap filters
262 
263 // 7.14.6.2.
264 template <typename Pixel>
NeedsFilter8(const Pixel * p,ptrdiff_t step,int outer_thresh,int inner_thresh)265 inline bool NeedsFilter8(const Pixel* p, ptrdiff_t step, int outer_thresh,
266                          int inner_thresh) {
267   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
268             p0 = p[-step];
269   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
270   return std::abs(p3 - p2) <= inner_thresh &&
271          std::abs(p2 - p1) <= inner_thresh &&
272          std::abs(p1 - p0) <= inner_thresh &&
273          std::abs(q1 - q0) <= inner_thresh &&
274          std::abs(q2 - q1) <= inner_thresh &&
275          std::abs(q3 - q2) <= inner_thresh &&
276          std::abs(p0 - q0) * 2 + std::abs(p1 - q1) / 2 <= outer_thresh;
277 }
278 
279 // 7.14.6.2.
280 template <typename Pixel>
IsFlat4(const Pixel * p,ptrdiff_t step,int flat_thresh)281 inline bool IsFlat4(const Pixel* p, ptrdiff_t step, int flat_thresh) {
282   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
283             p0 = p[-step];
284   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
285   return std::abs(p1 - p0) <= flat_thresh && std::abs(q1 - q0) <= flat_thresh &&
286          std::abs(p2 - p0) <= flat_thresh && std::abs(q2 - q0) <= flat_thresh &&
287          std::abs(p3 - p0) <= flat_thresh && std::abs(q3 - q0) <= flat_thresh;
288 }
289 
290 template <typename Pixel>
ApplyFilter8(int filter_value)291 inline Pixel ApplyFilter8(int filter_value) {
292   return static_cast<Pixel>(RightShiftWithRounding(filter_value, 3));
293 }
294 
295 // 7.14.6.4.
296 // 8 pixels in, 6 pixels out.
297 template <typename Pixel>
Filter8_C(Pixel * p,ptrdiff_t step)298 inline void Filter8_C(Pixel* p, ptrdiff_t step) {
299   const int p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
300             p0 = p[-step];
301   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step];
302   // The max is 8 * max_pixel + 4 for the rounder.
303   // 8bpp: 2044 (11 bits), 10bpp: 8188 (13 bits), 12bpp: 32764 (15 bits)
304   p[-3 * step] = ApplyFilter8<Pixel>(3 * p3 + 2 * p2 + p1 + p0 + q0);
305   p[-2 * step] = ApplyFilter8<Pixel>(2 * p3 + p2 + 2 * p1 + p0 + q0 + q1);
306   p[-1 * step] = ApplyFilter8<Pixel>(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2);
307   p[0 * step] = ApplyFilter8<Pixel>(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3);
308   p[1 * step] = ApplyFilter8<Pixel>(p1 + p0 + q0 + 2 * q1 + q2 + 2 * q3);
309   p[2 * step] = ApplyFilter8<Pixel>(p0 + q0 + q1 + 2 * q2 + 3 * q3);
310 }
311 
312 template <int bitdepth, typename Pixel>
Vertical8(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)313 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical8(void* dest, ptrdiff_t stride,
314                                                    int outer_thresh,
315                                                    int inner_thresh,
316                                                    int hev_thresh) {
317   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
318   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
319   auto* dst = static_cast<Pixel*>(dest);
320   stride /= sizeof(Pixel);
321   for (int i = 0; i < 4; ++i) {
322     if (NeedsFilter8(dst, 1, outer_thresh, inner_thresh)) {
323       if (IsFlat4(dst, 1, flat_thresh)) {
324         Filter8_C(dst, 1);
325       } else if (Hev(dst, 1, hev_thresh)) {
326         Filter2_C<bitdepth>(dst, 1);
327       } else {
328         Filter4_C<bitdepth>(dst, 1);
329       }
330     }
331     dst += stride;
332   }
333 }
334 
335 template <int bitdepth, typename Pixel>
Horizontal8(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)336 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal8(void* dest,
337                                                      ptrdiff_t stride,
338                                                      int outer_thresh,
339                                                      int inner_thresh,
340                                                      int hev_thresh) {
341   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
342   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
343   auto* dst = static_cast<Pixel*>(dest);
344   stride /= sizeof(Pixel);
345   for (int i = 0; i < 4; ++i) {
346     if (NeedsFilter8(dst, stride, outer_thresh, inner_thresh)) {
347       if (IsFlat4(dst, stride, flat_thresh)) {
348         Filter8_C(dst, stride);
349       } else if (Hev(dst, stride, hev_thresh)) {
350         Filter2_C<bitdepth>(dst, stride);
351       } else {
352         Filter4_C<bitdepth>(dst, stride);
353       }
354     }
355     ++dst;
356   }
357 }
358 
359 //------------------------------------------------------------------------------
360 // 13-tap filters
361 
362 // 7.14.6.2.
363 template <typename Pixel>
IsFlatOuter4(const Pixel * p,ptrdiff_t step,int flat_thresh)364 inline bool IsFlatOuter4(const Pixel* p, ptrdiff_t step, int flat_thresh) {
365   const int p6 = p[-7 * step], p5 = p[-6 * step], p4 = p[-5 * step],
366             p0 = p[-step];
367   const int q0 = p[0], q4 = p[4 * step], q5 = p[5 * step], q6 = p[6 * step];
368   return std::abs(p4 - p0) <= flat_thresh && std::abs(q4 - q0) <= flat_thresh &&
369          std::abs(p5 - p0) <= flat_thresh && std::abs(q5 - q0) <= flat_thresh &&
370          std::abs(p6 - p0) <= flat_thresh && std::abs(q6 - q0) <= flat_thresh;
371 }
372 
373 template <typename Pixel>
ApplyFilter14(int filter_value)374 inline Pixel ApplyFilter14(int filter_value) {
375   return static_cast<Pixel>(RightShiftWithRounding(filter_value, 4));
376 }
377 
378 // 7.14.6.4.
379 // 14 pixels in, 12 pixels out.
380 template <typename Pixel>
Filter14_C(Pixel * p,ptrdiff_t step)381 inline void Filter14_C(Pixel* p, ptrdiff_t step) {
382   const int p6 = p[-7 * step], p5 = p[-6 * step], p4 = p[-5 * step],
383             p3 = p[-4 * step], p2 = p[-3 * step], p1 = p[-2 * step],
384             p0 = p[-step];
385   const int q0 = p[0], q1 = p[step], q2 = p[2 * step], q3 = p[3 * step],
386             q4 = p[4 * step], q5 = p[5 * step], q6 = p[6 * step];
387   // The max is 16 * max_pixel + 8 for the rounder.
388   // 8bpp: 4088 (12 bits), 10bpp: 16376 (14 bits), 12bpp: 65528 (16 bits)
389   p[-6 * step] =
390       ApplyFilter14<Pixel>(p6 * 7 + p5 * 2 + p4 * 2 + p3 + p2 + p1 + p0 + q0);
391   p[-5 * step] = ApplyFilter14<Pixel>(p6 * 5 + p5 * 2 + p4 * 2 + p3 * 2 + p2 +
392                                       p1 + p0 + q0 + q1);
393   p[-4 * step] = ApplyFilter14<Pixel>(p6 * 4 + p5 + p4 * 2 + p3 * 2 + p2 * 2 +
394                                       p1 + p0 + q0 + q1 + q2);
395   p[-3 * step] = ApplyFilter14<Pixel>(p6 * 3 + p5 + p4 + p3 * 2 + p2 * 2 +
396                                       p1 * 2 + p0 + q0 + q1 + q2 + q3);
397   p[-2 * step] = ApplyFilter14<Pixel>(p6 * 2 + p5 + p4 + p3 + p2 * 2 + p1 * 2 +
398                                       p0 * 2 + q0 + q1 + q2 + q3 + q4);
399   p[-1 * step] = ApplyFilter14<Pixel>(p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 * 2 +
400                                       q0 * 2 + q1 + q2 + q3 + q4 + q5);
401   p[0 * step] = ApplyFilter14<Pixel>(p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 * 2 +
402                                      q1 * 2 + q2 + q3 + q4 + q5 + q6);
403   p[1 * step] = ApplyFilter14<Pixel>(p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 * 2 +
404                                      q2 * 2 + q3 + q4 + q5 + q6 * 2);
405   p[2 * step] = ApplyFilter14<Pixel>(p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 * 2 +
406                                      q3 * 2 + q4 + q5 + q6 * 3);
407   p[3 * step] = ApplyFilter14<Pixel>(p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 * 2 +
408                                      q4 * 2 + q5 + q6 * 4);
409   p[4 * step] = ApplyFilter14<Pixel>(p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 * 2 +
410                                      q5 * 2 + q6 * 5);
411   p[5 * step] =
412       ApplyFilter14<Pixel>(p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 * 2 + q6 * 7);
413 }
414 
415 template <int bitdepth, typename Pixel>
Vertical14(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)416 void LoopFilterFuncs_C<bitdepth, Pixel>::Vertical14(void* dest,
417                                                     ptrdiff_t stride,
418                                                     int outer_thresh,
419                                                     int inner_thresh,
420                                                     int hev_thresh) {
421   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
422   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
423   auto* dst = static_cast<Pixel*>(dest);
424   stride /= sizeof(Pixel);
425   for (int i = 0; i < 4; ++i) {
426     if (NeedsFilter8(dst, 1, outer_thresh, inner_thresh)) {
427       if (IsFlat4(dst, 1, flat_thresh)) {
428         if (IsFlatOuter4(dst, 1, flat_thresh)) {
429           Filter14_C(dst, 1);
430         } else {
431           Filter8_C(dst, 1);
432         }
433       } else if (Hev(dst, 1, hev_thresh)) {
434         Filter2_C<bitdepth>(dst, 1);
435       } else {
436         Filter4_C<bitdepth>(dst, 1);
437       }
438     }
439     dst += stride;
440   }
441 }
442 
443 template <int bitdepth, typename Pixel>
Horizontal14(void * dest,ptrdiff_t stride,int outer_thresh,int inner_thresh,int hev_thresh)444 void LoopFilterFuncs_C<bitdepth, Pixel>::Horizontal14(void* dest,
445                                                       ptrdiff_t stride,
446                                                       int outer_thresh,
447                                                       int inner_thresh,
448                                                       int hev_thresh) {
449   const int flat_thresh = LoopFilterFuncs_C<bitdepth, Pixel>::kFlatThresh;
450   AdjustThresholds(bitdepth, &outer_thresh, &inner_thresh, &hev_thresh);
451   auto* dst = static_cast<Pixel*>(dest);
452   stride /= sizeof(Pixel);
453   for (int i = 0; i < 4; ++i) {
454     if (NeedsFilter8(dst, stride, outer_thresh, inner_thresh)) {
455       if (IsFlat4(dst, stride, flat_thresh)) {
456         if (IsFlatOuter4(dst, stride, flat_thresh)) {
457           Filter14_C(dst, stride);
458         } else {
459           Filter8_C(dst, stride);
460         }
461       } else if (Hev(dst, stride, hev_thresh)) {
462         Filter2_C<bitdepth>(dst, stride);
463       } else {
464         Filter4_C<bitdepth>(dst, stride);
465       }
466     }
467     ++dst;
468   }
469 }
470 
471 using Defs8bpp = LoopFilterFuncs_C<8, uint8_t>;
472 
Init8bpp()473 void Init8bpp() {
474   Dsp* const dsp = dsp_internal::GetWritableDspTable(8);
475   assert(dsp != nullptr);
476 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
477   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
478       Defs8bpp::Horizontal4;
479   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
480       Defs8bpp::Vertical4;
481 
482   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
483       Defs8bpp::Horizontal6;
484   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
485       Defs8bpp::Vertical6;
486 
487   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
488       Defs8bpp::Horizontal8;
489   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
490       Defs8bpp::Vertical8;
491 
492   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
493       Defs8bpp::Horizontal14;
494   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
495       Defs8bpp::Vertical14;
496 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
497   static_cast<void>(dsp);
498 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize4_LoopFilterTypeHorizontal
499   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
500       Defs8bpp::Horizontal4;
501 #endif
502 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize4_LoopFilterTypeVertical
503   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
504       Defs8bpp::Vertical4;
505 #endif
506 
507 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize6_LoopFilterTypeHorizontal
508   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
509       Defs8bpp::Horizontal6;
510 #endif
511 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize6_LoopFilterTypeVertical
512   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
513       Defs8bpp::Vertical6;
514 #endif
515 
516 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize8_LoopFilterTypeHorizontal
517   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
518       Defs8bpp::Horizontal8;
519 #endif
520 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize8_LoopFilterTypeVertical
521   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
522       Defs8bpp::Vertical8;
523 #endif
524 
525 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeHorizontal
526   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
527       Defs8bpp::Horizontal14;
528 #endif
529 #ifndef LIBGAV1_Dsp8bpp_LoopFilterSize14_LoopFilterTypeVertical
530   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
531       Defs8bpp::Vertical14;
532 #endif
533 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
534 }
535 
536 #if LIBGAV1_MAX_BITDEPTH >= 10
537 using Defs10bpp = LoopFilterFuncs_C<10, uint16_t>;
538 
Init10bpp()539 void Init10bpp() {
540   Dsp* const dsp = dsp_internal::GetWritableDspTable(10);
541   assert(dsp != nullptr);
542 #if LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
543   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
544       Defs10bpp::Horizontal4;
545   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
546       Defs10bpp::Vertical4;
547 
548   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
549       Defs10bpp::Horizontal6;
550   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
551       Defs10bpp::Vertical6;
552 
553   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
554       Defs10bpp::Horizontal8;
555   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
556       Defs10bpp::Vertical8;
557 
558   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
559       Defs10bpp::Horizontal14;
560   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
561       Defs10bpp::Vertical14;
562 #else  // !LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
563   static_cast<void>(dsp);
564 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeHorizontal
565   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeHorizontal] =
566       Defs10bpp::Horizontal4;
567 #endif
568 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize4_LoopFilterTypeVertical
569   dsp->loop_filters[kLoopFilterSize4][kLoopFilterTypeVertical] =
570       Defs10bpp::Vertical4;
571 #endif
572 
573 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeHorizontal
574   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeHorizontal] =
575       Defs10bpp::Horizontal6;
576 #endif
577 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize6_LoopFilterTypeVertical
578   dsp->loop_filters[kLoopFilterSize6][kLoopFilterTypeVertical] =
579       Defs10bpp::Vertical6;
580 #endif
581 
582 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeHorizontal
583   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeHorizontal] =
584       Defs10bpp::Horizontal8;
585 #endif
586 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize8_LoopFilterTypeVertical
587   dsp->loop_filters[kLoopFilterSize8][kLoopFilterTypeVertical] =
588       Defs10bpp::Vertical8;
589 #endif
590 
591 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeHorizontal
592   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeHorizontal] =
593       Defs10bpp::Horizontal14;
594 #endif
595 #ifndef LIBGAV1_Dsp10bpp_LoopFilterSize14_LoopFilterTypeVertical
596   dsp->loop_filters[kLoopFilterSize14][kLoopFilterTypeVertical] =
597       Defs10bpp::Vertical14;
598 #endif
599 #endif  // LIBGAV1_ENABLE_ALL_DSP_FUNCTIONS
600 }
601 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
602 
603 }  // namespace
604 
LoopFilterInit_C()605 void LoopFilterInit_C() {
606   Init8bpp();
607 #if LIBGAV1_MAX_BITDEPTH >= 10
608   Init10bpp();
609 #endif
610   // Local functions that may be unused depending on the optimizations
611   // available.
612   static_cast<void>(AdjustThresholds);
613 }
614 
615 }  // namespace dsp
616 }  // namespace libgav1
617