1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
18 #define LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
19 
20 #include <cstdint>
21 #include <mutex>  // NOLINT (unapproved c++11 header)
22 
23 #include "src/dsp/constants.h"
24 #include "src/utils/common.h"
25 #include "src/utils/compiler_attributes.h"
26 #include "src/utils/constants.h"
27 #include "src/utils/memory.h"
28 #include "src/utils/stack.h"
29 
30 namespace libgav1 {
31 
32 // Buffer to facilitate decoding a superblock.
33 struct TileScratchBuffer : public MaxAlignedAllocable {
34   static constexpr int kBlockDecodedStride = 34;
35 
InitTileScratchBuffer36   LIBGAV1_MUST_USE_RESULT bool Init(int bitdepth) {
37 #if LIBGAV1_MAX_BITDEPTH >= 10
38     const int pixel_size = (bitdepth == 8) ? 1 : 2;
39 #else
40     assert(bitdepth == 8);
41     static_cast<void>(bitdepth);
42     const int pixel_size = 1;
43 #endif
44 
45     constexpr int unaligned_convolve_buffer_stride =
46         kMaxScaledSuperBlockSizeInPixels + kConvolveBorderLeftTop +
47         kConvolveBorderRight;
48     convolve_block_buffer_stride = Align<ptrdiff_t>(
49         unaligned_convolve_buffer_stride * pixel_size, kMaxAlignment);
50     constexpr int convolve_buffer_height = kMaxScaledSuperBlockSizeInPixels +
51                                            kConvolveBorderLeftTop +
52                                            kConvolveBorderBottom;
53 
54     convolve_block_buffer = MakeAlignedUniquePtr<uint8_t>(
55         kMaxAlignment, convolve_buffer_height * convolve_block_buffer_stride);
56     return convolve_block_buffer != nullptr;
57   }
58 
59   // kCompoundPredictionTypeDiffWeighted prediction mode needs a mask of the
60   // prediction block size. This buffer is used to store that mask. The masks
61   // will be created for the Y plane and will be re-used for the U & V planes.
62   alignas(kMaxAlignment) uint8_t weight_mask[kMaxSuperBlockSizeSquareInPixels];
63 
64   // For each instance of the TileScratchBuffer, only one of the following
65   // buffers will be used at any given time, so it is ok to share them in a
66   // union.
67   union {
68     // Buffers used for prediction process.
69     // Compound prediction calculations always output 16-bit values. Depending
70     // on the bitdepth the values may be treated as int16_t or uint16_t. See
71     // src/dsp/convolve.cc and src/dsp/warp.cc for explanations.
72     // Inter/intra calculations output Pixel values.
73     // These buffers always use width as the stride. This enables packing the
74     // values in and simplifies loads/stores for small values.
75 
76     // 10/12 bit compound prediction and 10/12 bit inter/intra prediction.
77     alignas(kMaxAlignment) uint16_t
78         prediction_buffer[2][kMaxSuperBlockSizeSquareInPixels];
79     // 8 bit compound prediction buffer.
80     alignas(kMaxAlignment) int16_t
81         compound_prediction_buffer_8bpp[2][kMaxSuperBlockSizeSquareInPixels];
82 
83     // Union usage note: This is used only by functions in the "intra"
84     // prediction path.
85     //
86     // Buffer used for storing subsampled luma samples needed for CFL
87     // prediction. This buffer is used to avoid repetition of the subsampling
88     // for the V plane when it is already done for the U plane.
89     int16_t cfl_luma_buffer[kCflLumaBufferStride][kCflLumaBufferStride];
90   };
91 
92   // Buffer used for convolve. The maximum size required for this buffer is:
93   //  maximum block height (with scaling and border) = 2 * 128 + 3 + 4 = 263.
94   //  maximum block stride (with scaling and border aligned to 16) =
95   //     (2 * 128 + 3 + 8 + 5) * pixel_size = 272 * pixel_size.
96   //  Where pixel_size is (bitdepth == 8) ? 1 : 2.
97   // Has an alignment of kMaxAlignment when allocated.
98   AlignedUniquePtr<uint8_t> convolve_block_buffer;
99   ptrdiff_t convolve_block_buffer_stride;
100 
101   // Flag indicating whether the data in |cfl_luma_buffer| is valid.
102   bool cfl_luma_buffer_valid;
103 
104   // Equivalent to BlockDecoded array in the spec. This stores the decoded
105   // state of every 4x4 block in a superblock. It has 1 row/column border on
106   // all 4 sides (hence the 34x34 dimension instead of 32x32). Note that the
107   // spec uses "-1" as an index to access the left and top borders. In the
108   // code, we treat the index (1, 1) as equivalent to the spec's (0, 0). So
109   // all accesses into this array will be offset by +1 when compared with the
110   // spec.
111   bool block_decoded[kMaxPlanes][kBlockDecodedStride][kBlockDecodedStride];
112 };
113 
114 class TileScratchBufferPool {
115  public:
Reset(int bitdepth)116   void Reset(int bitdepth) {
117     if (bitdepth_ == bitdepth) return;
118 #if LIBGAV1_MAX_BITDEPTH >= 10
119     if (bitdepth_ == 8 && bitdepth != 8) {
120       // We are going from a pixel size of 1 to a pixel size of 2. So invalidate
121       // the stack.
122       std::lock_guard<std::mutex> lock(mutex_);
123       while (!buffers_.Empty()) {
124         buffers_.Pop();
125       }
126     }
127 #endif
128     bitdepth_ = bitdepth;
129   }
130 
Get()131   std::unique_ptr<TileScratchBuffer> Get() {
132     std::lock_guard<std::mutex> lock(mutex_);
133     if (buffers_.Empty()) {
134       std::unique_ptr<TileScratchBuffer> scratch_buffer(new (std::nothrow)
135                                                             TileScratchBuffer);
136       if (scratch_buffer == nullptr || !scratch_buffer->Init(bitdepth_)) {
137         return nullptr;
138       }
139       return scratch_buffer;
140     }
141     return buffers_.Pop();
142   }
143 
Release(std::unique_ptr<TileScratchBuffer> scratch_buffer)144   void Release(std::unique_ptr<TileScratchBuffer> scratch_buffer) {
145     std::lock_guard<std::mutex> lock(mutex_);
146     buffers_.Push(std::move(scratch_buffer));
147   }
148 
149  private:
150   std::mutex mutex_;
151   // We will never need more than kMaxThreads scratch buffers since that is the
152   // maximum amount of work that will be done at any given time.
153   Stack<std::unique_ptr<TileScratchBuffer>, kMaxThreads> buffers_
154       LIBGAV1_GUARDED_BY(mutex_);
155   int bitdepth_ = 0;
156 };
157 
158 }  // namespace libgav1
159 
160 #endif  // LIBGAV1_SRC_TILE_SCRATCH_BUFFER_H_
161