1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_POST_FILTER_H_
18 #define LIBGAV1_SRC_POST_FILTER_H_
19 
20 #include <algorithm>
21 #include <array>
22 #include <atomic>
23 #include <cstddef>
24 #include <cstdint>
25 #include <cstring>
26 #include <type_traits>
27 
28 #include "src/dsp/common.h"
29 #include "src/dsp/dsp.h"
30 #include "src/frame_scratch_buffer.h"
31 #include "src/loop_restoration_info.h"
32 #include "src/obu_parser.h"
33 #include "src/utils/array_2d.h"
34 #include "src/utils/block_parameters_holder.h"
35 #include "src/utils/common.h"
36 #include "src/utils/constants.h"
37 #include "src/utils/memory.h"
38 #include "src/utils/threadpool.h"
39 #include "src/yuv_buffer.h"
40 
41 namespace libgav1 {
42 
43 // This class applies in-loop filtering for each frame after it is
44 // reconstructed. The in-loop filtering contains all post processing filtering
45 // for the reconstructed frame, including deblock filter, CDEF, superres,
46 // and loop restoration.
47 // Historically, for example in libaom, loop filter refers to deblock filter.
48 // To avoid name conflicts, we call this class PostFilter (post processing).
49 // In-loop post filtering order is:
50 // deblock --> CDEF --> super resolution--> loop restoration.
51 // When CDEF and super resolution is not used, we can combine deblock
52 // and restoration together to only filter frame buffer once.
53 class PostFilter {
54  public:
55   // This class does not take ownership of the masks/restoration_info, but it
56   // may change their values.
57   //
58   // The overall flow of data in this class (for both single and multi-threaded
59   // cases) is as follows:
60   //   -> Input: |frame_buffer_|.
61   //   -> Initialize |source_buffer_|, |cdef_buffer_|, |superres_buffer_| and
62   //      |loop_restoration_buffer_|.
63   //   -> Deblocking:
64   //      * Input: |source_buffer_|
65   //      * Output: |source_buffer_|
66   //   -> CDEF:
67   //      * Input: |source_buffer_|
68   //      * Output: |cdef_buffer_|
69   //   -> SuperRes:
70   //      * Input: |cdef_buffer_|
71   //      * Output: |superres_buffer_|
72   //   -> Loop Restoration:
73   //      * Input: |superres_buffer_|
74   //      * Output: |loop_restoration_buffer_|.
75   //   -> Now |frame_buffer_| contains the filtered frame.
76   PostFilter(const ObuFrameHeader& frame_header,
77              const ObuSequenceHeader& sequence_header,
78              FrameScratchBuffer* frame_scratch_buffer, YuvBuffer* frame_buffer,
79              const dsp::Dsp* dsp, int do_post_filter_mask);
80 
81   // non copyable/movable.
82   PostFilter(const PostFilter&) = delete;
83   PostFilter& operator=(const PostFilter&) = delete;
84   PostFilter(PostFilter&&) = delete;
85   PostFilter& operator=(PostFilter&&) = delete;
86 
87   // The overall function that applies all post processing filtering with
88   // multiple threads.
89   // * The filtering order is:
90   //   deblock --> CDEF --> super resolution--> loop restoration.
91   // * The output of each filter is the input for the following filter. A
92   //   special case is that loop restoration needs a few rows of the deblocked
93   //   frame and the entire cdef filtered frame:
94   //   deblock --> CDEF --> super resolution --> loop restoration.
95   //              |                                 ^
96   //              |                                 |
97   //              -----------> super resolution -----
98   // * Any of these filters could be present or absent.
99   // * |frame_buffer_| points to the decoded frame buffer. When
100   //   ApplyFilteringThreaded() is called, |frame_buffer_| is modified by each
101   //   of the filters as described below.
102   // Filter behavior (multi-threaded):
103   // * Deblock: In-place filtering. The output is written to |source_buffer_|.
104   //            If cdef and loop restoration are both on, then 4 rows (as
105   //            specified by |kLoopRestorationBorderRows|) in every 64x64 block
106   //            is copied into |loop_restoration_border_|.
107   // * Cdef: In-place filtering. Uses the |source_buffer_| and |cdef_border_| as
108   //         the input and the output is written into |cdef_buffer_| (which is
109   //         the same as |source_buffer_|).
110   // * SuperRes: Near in-place filtering. Uses the |cdef_buffer_| and
111   //             |superres_line_buffer_| as the input and the output is written
112   //             into |superres_buffer_| (which is just |cdef_buffer_| with a
113   //             shift to the top).
114   // * Restoration: Near in-place filtering.
115   //                Uses the |superres_buffer_| and |loop_restoration_border_|
116   //                as the input and the output is written into
117   //                |loop_restoration_buffer_| (which is just |superres_buffer_|
118   //                with a shift to the left).
119   void ApplyFilteringThreaded();
120 
121   // Does the overall post processing filter for one superblock row starting at
122   // |row4x4| with height 4*|sb4x4|. If |do_deblock| is false, deblocking filter
123   // will not be applied.
124   //
125   // Filter behavior (single-threaded):
126   // * Deblock: In-place filtering. The output is written to |source_buffer_|.
127   //            If cdef and loop restoration are both on, then 4 rows (as
128   //            specified by |kLoopRestorationBorderRows|) in every 64x64 block
129   //            is copied into |loop_restoration_border_|.
130   // * Cdef: In-place filtering. The output is written into |cdef_buffer_|
131   //         (which is just |source_buffer_| with a shift to the top-left).
132   // * SuperRes: Near in-place filtering. Uses the |cdef_buffer_| as the input
133   //             and the output is written into |superres_buffer_| (which is
134   //             just |cdef_buffer_| with a shift to the top).
135   // * Restoration: Near in-place filtering.
136   //                Uses the |superres_buffer_| and |loop_restoration_border_|
137   //                as the input and the output is written into
138   //                |loop_restoration_buffer_| (which is just |superres_buffer_|
139   //                with a shift to the left or top-left).
140   // Returns the index of the last row whose post processing is complete and can
141   // be used for referencing.
142   int ApplyFilteringForOneSuperBlockRow(int row4x4, int sb4x4, bool is_last_row,
143                                         bool do_deblock);
144 
145   // Apply deblocking filter in one direction (specified by |loop_filter_type|)
146   // for the superblock row starting at |row4x4_start| for columns starting from
147   // |column4x4_start| in increments of 16 (or 8 for chroma with subsampling)
148   // until the smallest multiple of 16 that is >= |column4x4_end| or until
149   // |frame_header_.columns4x4|, whichever is lower. This function must be
150   // called only if |DoDeblock()| returns true.
151   void ApplyDeblockFilter(LoopFilterType loop_filter_type, int row4x4_start,
152                           int column4x4_start, int column4x4_end, int sb4x4);
153 
DoCdef(const ObuFrameHeader & frame_header,int do_post_filter_mask)154   static bool DoCdef(const ObuFrameHeader& frame_header,
155                      int do_post_filter_mask) {
156     return (frame_header.cdef.bits > 0 ||
157             frame_header.cdef.y_primary_strength[0] > 0 ||
158             frame_header.cdef.y_secondary_strength[0] > 0 ||
159             frame_header.cdef.uv_primary_strength[0] > 0 ||
160             frame_header.cdef.uv_secondary_strength[0] > 0) &&
161            (do_post_filter_mask & 0x02) != 0;
162   }
DoCdef()163   bool DoCdef() const { return DoCdef(frame_header_, do_post_filter_mask_); }
164   // If filter levels for Y plane (0 for vertical, 1 for horizontal),
165   // are all zero, deblock filter will not be applied.
DoDeblock(const ObuFrameHeader & frame_header,uint8_t do_post_filter_mask)166   static bool DoDeblock(const ObuFrameHeader& frame_header,
167                         uint8_t do_post_filter_mask) {
168     return (frame_header.loop_filter.level[0] > 0 ||
169             frame_header.loop_filter.level[1] > 0) &&
170            (do_post_filter_mask & 0x01) != 0;
171   }
DoDeblock()172   bool DoDeblock() const {
173     return DoDeblock(frame_header_, do_post_filter_mask_);
174   }
175 
GetZeroDeltaDeblockFilterLevel(int segment_id,int level_index,ReferenceFrameType type,int mode_id)176   uint8_t GetZeroDeltaDeblockFilterLevel(int segment_id, int level_index,
177                                          ReferenceFrameType type,
178                                          int mode_id) const {
179     return deblock_filter_levels_[segment_id][level_index][type][mode_id];
180   }
181   // Computes the deblock filter levels using |delta_lf| and stores them in
182   // |deblock_filter_levels|.
183   void ComputeDeblockFilterLevels(
184       const int8_t delta_lf[kFrameLfCount],
185       uint8_t deblock_filter_levels[kMaxSegments][kFrameLfCount]
186                                    [kNumReferenceFrameTypes][2]) const;
187   // Returns true if loop restoration will be performed for the given parameters
188   // and mask.
DoRestoration(const LoopRestoration & loop_restoration,uint8_t do_post_filter_mask,int num_planes)189   static bool DoRestoration(const LoopRestoration& loop_restoration,
190                             uint8_t do_post_filter_mask, int num_planes) {
191     if (num_planes == kMaxPlanesMonochrome) {
192       return loop_restoration.type[kPlaneY] != kLoopRestorationTypeNone &&
193              (do_post_filter_mask & 0x08) != 0;
194     }
195     return (loop_restoration.type[kPlaneY] != kLoopRestorationTypeNone ||
196             loop_restoration.type[kPlaneU] != kLoopRestorationTypeNone ||
197             loop_restoration.type[kPlaneV] != kLoopRestorationTypeNone) &&
198            (do_post_filter_mask & 0x08) != 0;
199   }
DoRestoration()200   bool DoRestoration() const {
201     return DoRestoration(loop_restoration_, do_post_filter_mask_, planes_);
202   }
203 
204   // Returns a pointer to the unfiltered buffer. This is used by the Tile class
205   // to determine where to write the output of the tile decoding process taking
206   // in-place filtering offsets into consideration.
GetUnfilteredBuffer(int plane)207   uint8_t* GetUnfilteredBuffer(int plane) { return source_buffer_[plane]; }
frame_buffer()208   const YuvBuffer& frame_buffer() const { return frame_buffer_; }
209 
210   // Returns true if SuperRes will be performed for the given frame header and
211   // mask.
DoSuperRes(const ObuFrameHeader & frame_header,uint8_t do_post_filter_mask)212   static bool DoSuperRes(const ObuFrameHeader& frame_header,
213                          uint8_t do_post_filter_mask) {
214     return frame_header.width != frame_header.upscaled_width &&
215            (do_post_filter_mask & 0x04) != 0;
216   }
DoSuperRes()217   bool DoSuperRes() const {
218     return DoSuperRes(frame_header_, do_post_filter_mask_);
219   }
restoration_info()220   LoopRestorationInfo* restoration_info() const { return restoration_info_; }
GetBufferOffset(uint8_t * base_buffer,int stride,Plane plane,int row,int column)221   uint8_t* GetBufferOffset(uint8_t* base_buffer, int stride, Plane plane,
222                            int row, int column) const {
223     return base_buffer + (row >> subsampling_y_[plane]) * stride +
224            ((column >> subsampling_x_[plane]) << pixel_size_log2_);
225   }
GetSourceBuffer(Plane plane,int row4x4,int column4x4)226   uint8_t* GetSourceBuffer(Plane plane, int row4x4, int column4x4) const {
227     return GetBufferOffset(source_buffer_[plane], frame_buffer_.stride(plane),
228                            plane, MultiplyBy4(row4x4), MultiplyBy4(column4x4));
229   }
GetCdefBuffer(Plane plane,int row4x4,int column4x4)230   uint8_t* GetCdefBuffer(Plane plane, int row4x4, int column4x4) const {
231     return GetBufferOffset(cdef_buffer_[plane], frame_buffer_.stride(plane),
232                            plane, MultiplyBy4(row4x4), MultiplyBy4(column4x4));
233   }
GetSuperResBuffer(Plane plane,int row4x4,int column4x4)234   uint8_t* GetSuperResBuffer(Plane plane, int row4x4, int column4x4) const {
235     return GetBufferOffset(superres_buffer_[plane], frame_buffer_.stride(plane),
236                            plane, MultiplyBy4(row4x4), MultiplyBy4(column4x4));
237   }
238 
239   template <typename Pixel>
240   static void ExtendFrame(Pixel* frame_start, int width, int height,
241                           ptrdiff_t stride, int left, int right, int top,
242                           int bottom);
243 
244  private:
245   // The type of the HorizontalDeblockFilter and VerticalDeblockFilter member
246   // functions.
247   using DeblockFilter = void (PostFilter::*)(int row4x4_start,
248                                              int column4x4_start);
249   // The lookup table for picking the deblock filter, according to deblock
250   // filter type.
251   const DeblockFilter deblock_filter_func_[2] = {
252       &PostFilter::VerticalDeblockFilter, &PostFilter::HorizontalDeblockFilter};
253 
254   // Functions common to all post filters.
255 
256   // Extends the frame by setting the border pixel values to the one from its
257   // closest frame boundary.
258   void ExtendFrameBoundary(uint8_t* frame_start, int width, int height,
259                            ptrdiff_t stride, int left, int right, int top,
260                            int bottom) const;
261   // Extend frame boundary for referencing if the frame will be saved as a
262   // reference frame.
263   void ExtendBordersForReferenceFrame();
264   // Copies the deblocked pixels needed for loop restoration.
265   void CopyDeblockedPixels(Plane plane, int row4x4);
266   // Copies the border for one superblock row. If |for_loop_restoration| is
267   // true, then it assumes that the border extension is being performed for the
268   // input of the loop restoration process. If |for_loop_restoration| is false,
269   // then it assumes that the border extension is being performed for using the
270   // current frame as a reference frame. In this case, |progress_row_| is also
271   // updated.
272   void CopyBordersForOneSuperBlockRow(int row4x4, int sb4x4,
273                                       bool for_loop_restoration);
274   // Sets up the |loop_restoration_border_| for loop restoration.
275   // This is called when there is no CDEF filter. We copy rows from
276   // |superres_buffer_| and do the line extension.
277   void SetupLoopRestorationBorder(int row4x4_start);
278   // This is called when there is CDEF filter. We copy rows from
279   // |source_buffer_|, apply superres and do the line extension.
280   void SetupLoopRestorationBorder(int row4x4_start, int sb4x4);
281   // Returns true if we can perform border extension in loop (i.e.) without
282   // waiting until the entire frame is decoded. If intra_block_copy is true, we
283   // do in-loop border extension only if the upscaled_width is the same as 4 *
284   // columns4x4. Otherwise, we cannot do in loop border extension since those
285   // pixels may be used by intra block copy.
DoBorderExtensionInLoop()286   bool DoBorderExtensionInLoop() const {
287     return !frame_header_.allow_intrabc ||
288            frame_header_.upscaled_width ==
289                MultiplyBy4(frame_header_.columns4x4);
290   }
291   template <typename Pixel>
CopyPlane(const Pixel * src,ptrdiff_t src_stride,int width,int height,Pixel * dst,ptrdiff_t dst_stride)292   void CopyPlane(const Pixel* src, ptrdiff_t src_stride, int width, int height,
293                  Pixel* dst, ptrdiff_t dst_stride) {
294     assert(height > 0);
295     do {
296       memcpy(dst, src, width * sizeof(Pixel));
297       src += src_stride;
298       dst += dst_stride;
299     } while (--height != 0);
300   }
301 
302   // Worker function used for multi-threaded implementation of Deblocking, CDEF
303   // and Loop Restoration.
304   using WorkerFunction = void (PostFilter::*)(std::atomic<int>* row4x4_atomic);
305   // Schedules |worker| jobs to the |thread_pool_|, runs them in the calling
306   // thread and returns once all the jobs are completed.
307   void RunJobs(WorkerFunction worker);
308 
309   // Functions for the Deblocking filter.
310 
GetIndex(int row4x4)311   static int GetIndex(int row4x4) { return DivideBy4(row4x4); }
GetShift(int row4x4,int column4x4)312   static int GetShift(int row4x4, int column4x4) {
313     return ((row4x4 & 3) << 4) | column4x4;
314   }
GetDeblockUnitId(int row_unit,int column_unit)315   int GetDeblockUnitId(int row_unit, int column_unit) const {
316     return row_unit * num_64x64_blocks_per_row_ + column_unit;
317   }
318   bool GetHorizontalDeblockFilterEdgeInfo(int row4x4, int column4x4,
319                                           uint8_t* level, int* step,
320                                           int* filter_length) const;
321   void GetHorizontalDeblockFilterEdgeInfoUV(int row4x4, int column4x4,
322                                             uint8_t* level_u, uint8_t* level_v,
323                                             int* step,
324                                             int* filter_length) const;
325   bool GetVerticalDeblockFilterEdgeInfo(int row4x4, int column4x4,
326                                         BlockParameters* const* bp_ptr,
327                                         uint8_t* level, int* step,
328                                         int* filter_length) const;
329   void GetVerticalDeblockFilterEdgeInfoUV(int column4x4,
330                                           BlockParameters* const* bp_ptr,
331                                           uint8_t* level_u, uint8_t* level_v,
332                                           int* step, int* filter_length) const;
333   void HorizontalDeblockFilter(int row4x4_start, int column4x4_start);
334   void VerticalDeblockFilter(int row4x4_start, int column4x4_start);
335   // HorizontalDeblockFilter and VerticalDeblockFilter must have the correct
336   // signature.
337   static_assert(std::is_same<decltype(&PostFilter::HorizontalDeblockFilter),
338                              DeblockFilter>::value,
339                 "");
340   static_assert(std::is_same<decltype(&PostFilter::VerticalDeblockFilter),
341                              DeblockFilter>::value,
342                 "");
343   // Applies deblock filtering for the superblock row starting at |row4x4| with
344   // a height of 4*|sb4x4|.
345   void ApplyDeblockFilterForOneSuperBlockRow(int row4x4, int sb4x4);
346   // Worker function used for multi-threaded deblocking.
347   template <LoopFilterType loop_filter_type>
348   void DeblockFilterWorker(std::atomic<int>* row4x4_atomic);
349   static_assert(
350       std::is_same<
351           decltype(&PostFilter::DeblockFilterWorker<kLoopFilterTypeVertical>),
352           WorkerFunction>::value,
353       "");
354   static_assert(
355       std::is_same<
356           decltype(&PostFilter::DeblockFilterWorker<kLoopFilterTypeHorizontal>),
357           WorkerFunction>::value,
358       "");
359 
360   // Functions for the cdef filter.
361 
362   // Copies the deblocked pixels necessary for use by the multi-threaded cdef
363   // implementation into |cdef_border_|.
364   void SetupCdefBorder(int row4x4);
365   // This function prepares the input source block for cdef filtering. The input
366   // source block contains a 12x12 block, with the inner 8x8 as the desired
367   // filter region. It pads the block if the 12x12 block includes out of frame
368   // pixels with a large value. This achieves the required behavior defined in
369   // section 5.11.52 of the spec.
370   template <typename Pixel>
371   void PrepareCdefBlock(int block_width4x4, int block_height4x4, int row4x4,
372                         int column4x4, uint16_t* cdef_source,
373                         ptrdiff_t cdef_stride, bool y_plane,
374                         const uint8_t border_columns[kMaxPlanes][256],
375                         bool use_border_columns);
376   // Applies cdef for one 64x64 block.
377   template <typename Pixel>
378   void ApplyCdefForOneUnit(uint16_t* cdef_block, int index, int block_width4x4,
379                            int block_height4x4, int row4x4_start,
380                            int column4x4_start,
381                            uint8_t border_columns[2][kMaxPlanes][256],
382                            bool use_border_columns[2][2]);
383   // Helper function used by ApplyCdefForOneSuperBlockRow to avoid some code
384   // duplication.
385   void ApplyCdefForOneSuperBlockRowHelper(
386       uint16_t* cdef_block, uint8_t border_columns[2][kMaxPlanes][256],
387       int row4x4, int block_height4x4);
388   // Applies CDEF filtering for the superblock row starting at |row4x4| with a
389   // height of 4*|sb4x4|.
390   void ApplyCdefForOneSuperBlockRow(int row4x4, int sb4x4, bool is_last_row);
391   // Worker function used for multi-threaded CDEF.
392   void ApplyCdefWorker(std::atomic<int>* row4x4_atomic);
393   static_assert(std::is_same<decltype(&PostFilter::ApplyCdefWorker),
394                              WorkerFunction>::value,
395                 "");
396 
397   // Functions for the SuperRes filter.
398 
399   // Applies super resolution for the |src| for |rows[plane]| rows of each
400   // plane. If |line_buffer_row| is larger than or equal to 0, one more row will
401   // be processed, the line buffer indicated by |line_buffer_row| will be used
402   // as the source. If |dst_is_loop_restoration_border| is true, then it means
403   // that the |dst| pointers come from |loop_restoration_border_| and the
404   // strides will be populated from that buffer.
405   void ApplySuperRes(
406       const std::array<uint8_t*, kMaxPlanes>& src,
407       const std::array<int, kMaxPlanes>& rows, int line_buffer_row,
408       const std::array<uint8_t*, kMaxPlanes>& dst,
409       bool dst_is_loop_restoration_border = false);  // Section 7.16.
410   // Applies SuperRes for the superblock row starting at |row4x4| with a height
411   // of 4*|sb4x4|.
412   void ApplySuperResForOneSuperBlockRow(int row4x4, int sb4x4,
413                                         bool is_last_row);
414   void ApplySuperResThreaded();
415 
416   // Functions for the Loop Restoration filter.
417 
418   // Notes about Loop Restoration:
419   // (1). Loop restoration processing unit size is default to 64x64.
420   // Only when the remaining filtering area is smaller than 64x64, the
421   // processing unit size is the actual area size.
422   // For U/V plane, it is (64 >> subsampling_x) x (64 >> subsampling_y).
423   // (2). Loop restoration unit size can be 64x64, 128x128, 256x256 for Y
424   // plane. The unit size for chroma can be the same or half, depending on
425   // subsampling. If either subsampling_x or subsampling_y is one, unit size
426   // is halved on both x and y sides.
427   // All loop restoration units have the same size for one plane.
428   // One loop restoration unit could contain multiple processing units.
429   // But they share the same sets of loop restoration parameters.
430   // (3). Loop restoration has a row offset, kRestorationUnitOffset = 8. The
431   // size of first row of loop restoration units and processing units is
432   // shrunk by the offset.
433   // (4). Loop restoration units wrap the bottom and the right of the frame,
434   // if the remaining area is small. The criteria is whether the number of
435   // remaining rows/columns is smaller than half of loop restoration unit
436   // size.
437   // For example, if the frame size is 140x140, loop restoration unit size is
438   // 128x128. The size of the first loop restoration unit is 128x(128-8) =
439   // 128 columns x 120 rows.
440   // Since 140 - 120 < 128/2. The remaining 20 rows will be folded to the loop
441   // restoration unit. Similarly, the remaining 12 columns will also be folded
442   // to current loop restoration unit. So, even frame size is 140x140,
443   // there's only one loop restoration unit. Suppose processing unit is 64x64,
444   // then sizes of the first row of processing units are 64x56, 64x56, 12x56,
445   // respectively. The second row is 64x64, 64x64, 12x64.
446   // The third row is 64x20, 64x20, 12x20.
447 
448   // |stride| is shared by |src_buffer| and |dst_buffer|.
449   template <typename Pixel>
450   void ApplyLoopRestorationForOneRow(const Pixel* src_buffer, ptrdiff_t stride,
451                                      Plane plane, int plane_height,
452                                      int plane_width, int y, int unit_row,
453                                      int current_process_unit_height,
454                                      int plane_unit_size, Pixel* dst_buffer);
455   // Applies loop restoration for the superblock row starting at |row4x4_start|
456   // with a height of 4*|sb4x4|.
457   template <typename Pixel>
458   void ApplyLoopRestorationForOneSuperBlockRow(int row4x4_start, int sb4x4);
459   // Helper function that calls the right variant of
460   // ApplyLoopRestorationForOneSuperBlockRow based on the bitdepth.
461   void ApplyLoopRestoration(int row4x4_start, int sb4x4);
462   // Worker function used for multithreaded Loop Restoration.
463   void ApplyLoopRestorationWorker(std::atomic<int>* row4x4_atomic);
464   static_assert(std::is_same<decltype(&PostFilter::ApplyLoopRestorationWorker),
465                              WorkerFunction>::value,
466                 "");
467 
468   const ObuFrameHeader& frame_header_;
469   const LoopRestoration& loop_restoration_;
470   const dsp::Dsp& dsp_;
471   const int num_64x64_blocks_per_row_;
472   const int upscaled_width_;
473   const int width_;
474   const int height_;
475   const int8_t bitdepth_;
476   const int8_t subsampling_x_[kMaxPlanes];
477   const int8_t subsampling_y_[kMaxPlanes];
478   const int8_t planes_;
479   const int pixel_size_log2_;
480   const uint8_t* const inner_thresh_;
481   const uint8_t* const outer_thresh_;
482   const bool needs_chroma_deblock_;
483   // This stores the deblocking filter levels assuming that the delta is zero.
484   // This will be used by all superblocks whose delta is zero (without having to
485   // recompute them). The dimensions (in order) are: segment_id, level_index
486   // (based on plane and direction), reference_frame and mode_id.
487   uint8_t deblock_filter_levels_[kMaxSegments][kFrameLfCount]
488                                 [kNumReferenceFrameTypes][2];
489   // Stores the SuperRes info for the frame.
490   struct {
491     int upscaled_width;
492     int initial_subpixel_x;
493     int step;
494   } super_res_info_[kMaxPlanes];
495   const Array2D<int16_t>& cdef_index_;
496   const Array2D<TransformSize>& inter_transform_sizes_;
497   LoopRestorationInfo* const restoration_info_;
498   uint8_t* const superres_coefficients_[kNumPlaneTypes];
499   // Line buffer used by multi-threaded ApplySuperRes().
500   // In the multi-threaded case, this buffer will store the last downscaled row
501   // input of each thread to avoid overwrites by the first upscaled row output
502   // of the thread below it.
503   YuvBuffer& superres_line_buffer_;
504   const BlockParametersHolder& block_parameters_;
505   // Frame buffer to hold cdef filtered frame.
506   YuvBuffer cdef_filtered_buffer_;
507   // Input frame buffer.
508   YuvBuffer& frame_buffer_;
509   // A view into |frame_buffer_| that points to the input and output of the
510   // deblocking process.
511   uint8_t* source_buffer_[kMaxPlanes];
512   // A view into |frame_buffer_| that points to the output of the CDEF filtered
513   // planes (to facilitate in-place CDEF filtering).
514   uint8_t* cdef_buffer_[kMaxPlanes];
515   // A view into |frame_buffer_| that points to the planes after the SuperRes
516   // filter is applied (to facilitate in-place SuperRes).
517   uint8_t* superres_buffer_[kMaxPlanes];
518   // A view into |frame_buffer_| that points to the output of the Loop Restored
519   // planes (to facilitate in-place Loop Restoration).
520   uint8_t* loop_restoration_buffer_[kMaxPlanes];
521   YuvBuffer& cdef_border_;
522   // Buffer used to store the border pixels that are necessary for loop
523   // restoration. This buffer will store 4 rows for every 64x64 block (4 rows
524   // for every 32x32 for chroma with subsampling). The indices of the rows that
525   // are stored are specified in |kLoopRestorationBorderRows|. First 4 rows of
526   // this buffer are never populated and never used.
527   // This buffer is used only when both of the following conditions are true:
528   //   (1). Loop Restoration is on.
529   //   (2). Cdef is on, or multi-threading is enabled for post filter.
530   YuvBuffer& loop_restoration_border_;
531   const uint8_t do_post_filter_mask_;
532   ThreadPool* const thread_pool_;
533 
534   // Tracks the progress of the post filters.
535   int progress_row_ = -1;
536 
537   // A block buffer to hold the input that is converted to uint16_t before
538   // cdef filtering. Only used in single threaded case. Y plane is processed
539   // separately. U and V planes are processed together. So it is sufficient to
540   // have this buffer to accommodate 2 planes at a time.
541   uint16_t cdef_block_[kCdefUnitSizeWithBorders * kCdefUnitSizeWithBorders * 2];
542 
543   template <int bitdepth, typename Pixel>
544   friend class PostFilterSuperResTest;
545 
546   template <int bitdepth, typename Pixel>
547   friend class PostFilterHelperFuncTest;
548 };
549 
550 extern template void PostFilter::ExtendFrame<uint8_t>(uint8_t* frame_start,
551                                                       int width, int height,
552                                                       ptrdiff_t stride,
553                                                       int left, int right,
554                                                       int top, int bottom);
555 
556 #if LIBGAV1_MAX_BITDEPTH >= 10
557 extern template void PostFilter::ExtendFrame<uint16_t>(uint16_t* frame_start,
558                                                        int width, int height,
559                                                        ptrdiff_t stride,
560                                                        int left, int right,
561                                                        int top, int bottom);
562 #endif
563 
564 }  // namespace libgav1
565 
566 #endif  // LIBGAV1_SRC_POST_FILTER_H_
567