1 // Copyright 2019 The libgav1 Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include <algorithm>
16 #include <array>
17 #include <cassert>
18 #include <cstddef>
19 #include <cstdint>
20 #include <cstdlib>
21 #include <cstring>
22 #include <memory>
23 
24 #include "src/buffer_pool.h"
25 #include "src/dsp/constants.h"
26 #include "src/dsp/dsp.h"
27 #include "src/motion_vector.h"
28 #include "src/obu_parser.h"
29 #include "src/prediction_mask.h"
30 #include "src/tile.h"
31 #include "src/utils/array_2d.h"
32 #include "src/utils/bit_mask_set.h"
33 #include "src/utils/block_parameters_holder.h"
34 #include "src/utils/common.h"
35 #include "src/utils/constants.h"
36 #include "src/utils/logging.h"
37 #include "src/utils/memory.h"
38 #include "src/utils/types.h"
39 #include "src/warp_prediction.h"
40 #include "src/yuv_buffer.h"
41 
42 namespace libgav1 {
43 namespace {
44 
45 // Import all the constants in the anonymous namespace.
46 #include "src/inter_intra_masks.inc"
47 
48 // Precision bits when scaling reference frames.
49 constexpr int kReferenceScaleShift = 14;
50 constexpr int kAngleStep = 3;
51 constexpr int kPredictionModeToAngle[kIntraPredictionModesUV] = {
52     0, 90, 180, 45, 135, 113, 157, 203, 67, 0, 0, 0, 0};
53 
54 // The following modes need both the left_column and top_row for intra
55 // prediction. For directional modes left/top requirement is inferred based on
56 // the prediction angle. For Dc modes, left/top requirement is inferred based on
57 // whether or not left/top is available.
58 constexpr BitMaskSet kNeedsLeftAndTop(kPredictionModeSmooth,
59                                       kPredictionModeSmoothHorizontal,
60                                       kPredictionModeSmoothVertical,
61                                       kPredictionModePaeth);
62 
GetDirectionalIntraPredictorDerivative(const int angle)63 int16_t GetDirectionalIntraPredictorDerivative(const int angle) {
64   assert(angle >= 3);
65   assert(angle <= 87);
66   return kDirectionalIntraPredictorDerivative[DivideBy2(angle) - 1];
67 }
68 
69 // Maps the block_size to an index as follows:
70 //  kBlock8x8 => 0.
71 //  kBlock8x16 => 1.
72 //  kBlock8x32 => 2.
73 //  kBlock16x8 => 3.
74 //  kBlock16x16 => 4.
75 //  kBlock16x32 => 5.
76 //  kBlock32x8 => 6.
77 //  kBlock32x16 => 7.
78 //  kBlock32x32 => 8.
GetWedgeBlockSizeIndex(BlockSize block_size)79 int GetWedgeBlockSizeIndex(BlockSize block_size) {
80   assert(block_size >= kBlock8x8);
81   return block_size - kBlock8x8 - static_cast<int>(block_size >= kBlock16x8) -
82          static_cast<int>(block_size >= kBlock32x8);
83 }
84 
85 // Maps a dimension of 4, 8, 16 and 32 to indices 0, 1, 2 and 3 respectively.
GetInterIntraMaskLookupIndex(int dimension)86 int GetInterIntraMaskLookupIndex(int dimension) {
87   assert(dimension == 4 || dimension == 8 || dimension == 16 ||
88          dimension == 32);
89   return FloorLog2(dimension) - 2;
90 }
91 
92 // 7.11.2.9.
GetIntraEdgeFilterStrength(int width,int height,int filter_type,int delta)93 int GetIntraEdgeFilterStrength(int width, int height, int filter_type,
94                                int delta) {
95   const int sum = width + height;
96   delta = std::abs(delta);
97   if (filter_type == 0) {
98     if (sum <= 8) {
99       if (delta >= 56) return 1;
100     } else if (sum <= 16) {
101       if (delta >= 40) return 1;
102     } else if (sum <= 24) {
103       if (delta >= 32) return 3;
104       if (delta >= 16) return 2;
105       if (delta >= 8) return 1;
106     } else if (sum <= 32) {
107       if (delta >= 32) return 3;
108       if (delta >= 4) return 2;
109       return 1;
110     } else {
111       return 3;
112     }
113   } else {
114     if (sum <= 8) {
115       if (delta >= 64) return 2;
116       if (delta >= 40) return 1;
117     } else if (sum <= 16) {
118       if (delta >= 48) return 2;
119       if (delta >= 20) return 1;
120     } else if (sum <= 24) {
121       if (delta >= 4) return 3;
122     } else {
123       return 3;
124     }
125   }
126   return 0;
127 }
128 
129 // 7.11.2.10.
DoIntraEdgeUpsampling(int width,int height,int filter_type,int delta)130 bool DoIntraEdgeUpsampling(int width, int height, int filter_type, int delta) {
131   const int sum = width + height;
132   delta = std::abs(delta);
133   // This function should not be called when the prediction angle is 90 or 180.
134   assert(delta != 0);
135   if (delta >= 40) return false;
136   return (filter_type == 1) ? sum <= 8 : sum <= 16;
137 }
138 
139 constexpr uint8_t kQuantizedDistanceWeight[4][2] = {
140     {2, 3}, {2, 5}, {2, 7}, {1, kMaxFrameDistance}};
141 
142 constexpr uint8_t kQuantizedDistanceLookup[4][2] = {
143     {9, 7}, {11, 5}, {12, 4}, {13, 3}};
144 
GetDistanceWeights(const int distance[2],int weight[2])145 void GetDistanceWeights(const int distance[2], int weight[2]) {
146   // Note: distance[0] and distance[1] correspond to relative distance
147   // between current frame and reference frame [1] and [0], respectively.
148   const int order = static_cast<int>(distance[0] <= distance[1]);
149   if (distance[0] == 0 || distance[1] == 0) {
150     weight[0] = kQuantizedDistanceLookup[3][order];
151     weight[1] = kQuantizedDistanceLookup[3][1 - order];
152   } else {
153     int i;
154     for (i = 0; i < 3; ++i) {
155       const int weight_0 = kQuantizedDistanceWeight[i][order];
156       const int weight_1 = kQuantizedDistanceWeight[i][1 - order];
157       if (order == 0) {
158         if (distance[0] * weight_0 < distance[1] * weight_1) break;
159       } else {
160         if (distance[0] * weight_0 > distance[1] * weight_1) break;
161       }
162     }
163     weight[0] = kQuantizedDistanceLookup[i][order];
164     weight[1] = kQuantizedDistanceLookup[i][1 - order];
165   }
166 }
167 
GetIntraPredictor(PredictionMode mode,bool has_left,bool has_top)168 dsp::IntraPredictor GetIntraPredictor(PredictionMode mode, bool has_left,
169                                       bool has_top) {
170   if (mode == kPredictionModeDc) {
171     if (has_left && has_top) {
172       return dsp::kIntraPredictorDc;
173     }
174     if (has_left) {
175       return dsp::kIntraPredictorDcLeft;
176     }
177     if (has_top) {
178       return dsp::kIntraPredictorDcTop;
179     }
180     return dsp::kIntraPredictorDcFill;
181   }
182   switch (mode) {
183     case kPredictionModePaeth:
184       return dsp::kIntraPredictorPaeth;
185     case kPredictionModeSmooth:
186       return dsp::kIntraPredictorSmooth;
187     case kPredictionModeSmoothVertical:
188       return dsp::kIntraPredictorSmoothVertical;
189     case kPredictionModeSmoothHorizontal:
190       return dsp::kIntraPredictorSmoothHorizontal;
191     default:
192       return dsp::kNumIntraPredictors;
193   }
194 }
195 
GetStartPoint(Array2DView<uint8_t> * const buffer,const int plane,const int x,const int y,const int bitdepth)196 uint8_t* GetStartPoint(Array2DView<uint8_t>* const buffer, const int plane,
197                        const int x, const int y, const int bitdepth) {
198 #if LIBGAV1_MAX_BITDEPTH >= 10
199   if (bitdepth > 8) {
200     Array2DView<uint16_t> buffer16(
201         buffer[plane].rows(), buffer[plane].columns() / sizeof(uint16_t),
202         reinterpret_cast<uint16_t*>(&buffer[plane][0][0]));
203     return reinterpret_cast<uint8_t*>(&buffer16[y][x]);
204   }
205 #endif  // LIBGAV1_MAX_BITDEPTH >= 10
206   static_cast<void>(bitdepth);
207   return &buffer[plane][y][x];
208 }
209 
GetPixelPositionFromHighScale(int start,int step,int offset)210 int GetPixelPositionFromHighScale(int start, int step, int offset) {
211   return (start + step * offset) >> kScaleSubPixelBits;
212 }
213 
GetMaskBlendFunc(const dsp::Dsp & dsp,bool is_inter_intra,bool is_wedge_inter_intra,int subsampling_x,int subsampling_y)214 dsp::MaskBlendFunc GetMaskBlendFunc(const dsp::Dsp& dsp, bool is_inter_intra,
215                                     bool is_wedge_inter_intra,
216                                     int subsampling_x, int subsampling_y) {
217   return (is_inter_intra && !is_wedge_inter_intra)
218              ? dsp.mask_blend[0][/*is_inter_intra=*/true]
219              : dsp.mask_blend[subsampling_x + subsampling_y][is_inter_intra];
220 }
221 
222 }  // namespace
223 
224 template <typename Pixel>
IntraPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool has_top_right,bool has_bottom_left,PredictionMode mode,TransformSize tx_size)225 void Tile::IntraPrediction(const Block& block, Plane plane, int x, int y,
226                            bool has_left, bool has_top, bool has_top_right,
227                            bool has_bottom_left, PredictionMode mode,
228                            TransformSize tx_size) {
229   const int width = 1 << kTransformWidthLog2[tx_size];
230   const int height = 1 << kTransformHeightLog2[tx_size];
231   const int x_shift = subsampling_x_[plane];
232   const int y_shift = subsampling_y_[plane];
233   const int max_x = (MultiplyBy4(frame_header_.columns4x4) >> x_shift) - 1;
234   const int max_y = (MultiplyBy4(frame_header_.rows4x4) >> y_shift) - 1;
235   // For performance reasons, do not initialize the following two buffers.
236   alignas(kMaxAlignment) Pixel top_row_data[160];
237   alignas(kMaxAlignment) Pixel left_column_data[160];
238 #if LIBGAV1_MSAN
239   if (IsDirectionalMode(mode)) {
240     memset(top_row_data, 0, sizeof(top_row_data));
241     memset(left_column_data, 0, sizeof(left_column_data));
242   }
243 #endif
244   // Some predictors use |top_row_data| and |left_column_data| with a negative
245   // offset to access pixels to the top-left of the current block. So have some
246   // space before the arrays to allow populating those without having to move
247   // the rest of the array.
248   Pixel* const top_row = top_row_data + 16;
249   Pixel* const left_column = left_column_data + 16;
250   const int bitdepth = sequence_header_.color_config.bitdepth;
251   const int top_and_left_size = width + height;
252   const bool is_directional_mode = IsDirectionalMode(mode);
253   const PredictionParameters& prediction_parameters =
254       *block.bp->prediction_parameters;
255   const bool use_filter_intra =
256       (plane == kPlaneY && prediction_parameters.use_filter_intra);
257   const int prediction_angle =
258       is_directional_mode
259           ? kPredictionModeToAngle[mode] +
260                 prediction_parameters.angle_delta[GetPlaneType(plane)] *
261                     kAngleStep
262           : 0;
263   // Directional prediction requires buffers larger than the width or height.
264   const int top_size = is_directional_mode ? top_and_left_size : width;
265   const int left_size = is_directional_mode ? top_and_left_size : height;
266   const int top_right_size =
267       is_directional_mode ? (has_top_right ? 2 : 1) * width : width;
268   const int bottom_left_size =
269       is_directional_mode ? (has_bottom_left ? 2 : 1) * height : height;
270 
271   Array2DView<Pixel> buffer(buffer_[plane].rows(),
272                             buffer_[plane].columns() / sizeof(Pixel),
273                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
274   const bool needs_top = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
275                          (is_directional_mode && prediction_angle < 180) ||
276                          (mode == kPredictionModeDc && has_top);
277   const bool needs_left = use_filter_intra || kNeedsLeftAndTop.Contains(mode) ||
278                           (is_directional_mode && prediction_angle > 90) ||
279                           (mode == kPredictionModeDc && has_left);
280 
281   const Pixel* top_row_src = buffer[y - 1];
282 
283   // Determine if we need to retrieve the top row from
284   // |intra_prediction_buffer_|.
285   if ((needs_top || needs_left) && use_intra_prediction_buffer_) {
286     // Superblock index of block.row4x4. block.row4x4 is always in luma
287     // dimension (no subsampling).
288     const int current_superblock_index =
289         block.row4x4 >> (sequence_header_.use_128x128_superblock ? 5 : 4);
290     // Superblock index of y - 1. y is in the plane dimension (chroma planes
291     // could be subsampled).
292     const int plane_shift = (sequence_header_.use_128x128_superblock ? 7 : 6) -
293                             subsampling_y_[plane];
294     const int top_row_superblock_index = (y - 1) >> plane_shift;
295     // If the superblock index of y - 1 is not that of the current superblock,
296     // then we will have to retrieve the top row from the
297     // |intra_prediction_buffer_|.
298     if (current_superblock_index != top_row_superblock_index) {
299       top_row_src = reinterpret_cast<const Pixel*>(
300           (*intra_prediction_buffer_)[plane].get());
301     }
302   }
303 
304   if (needs_top) {
305     // Compute top_row.
306     if (has_top || has_left) {
307       const int left_index = has_left ? x - 1 : x;
308       top_row[-1] = has_top ? top_row_src[left_index] : buffer[y][left_index];
309     } else {
310       top_row[-1] = 1 << (bitdepth - 1);
311     }
312     if (!has_top && has_left) {
313       Memset(top_row, buffer[y][x - 1], top_size);
314     } else if (!has_top && !has_left) {
315       Memset(top_row, (1 << (bitdepth - 1)) - 1, top_size);
316     } else {
317       const int top_limit = std::min(max_x - x + 1, top_right_size);
318       memcpy(top_row, &top_row_src[x], top_limit * sizeof(Pixel));
319       // Even though it is safe to call Memset with a size of 0, accessing
320       // top_row_src[top_limit - x + 1] is not allowed when this condition is
321       // false.
322       if (top_size - top_limit > 0) {
323         Memset(top_row + top_limit, top_row_src[top_limit + x - 1],
324                top_size - top_limit);
325       }
326     }
327   }
328   if (needs_left) {
329     // Compute left_column.
330     if (has_top || has_left) {
331       const int left_index = has_left ? x - 1 : x;
332       left_column[-1] =
333           has_top ? top_row_src[left_index] : buffer[y][left_index];
334     } else {
335       left_column[-1] = 1 << (bitdepth - 1);
336     }
337     if (!has_left && has_top) {
338       Memset(left_column, top_row_src[x], left_size);
339     } else if (!has_left && !has_top) {
340       Memset(left_column, (1 << (bitdepth - 1)) + 1, left_size);
341     } else {
342       const int left_limit = std::min(max_y - y + 1, bottom_left_size);
343       for (int i = 0; i < left_limit; ++i) {
344         left_column[i] = buffer[y + i][x - 1];
345       }
346       // Even though it is safe to call Memset with a size of 0, accessing
347       // buffer[left_limit - y + 1][x - 1] is not allowed when this condition is
348       // false.
349       if (left_size - left_limit > 0) {
350         Memset(left_column + left_limit, buffer[left_limit + y - 1][x - 1],
351                left_size - left_limit);
352       }
353     }
354   }
355   Pixel* const dest = &buffer[y][x];
356   const ptrdiff_t dest_stride = buffer_[plane].columns();
357   if (use_filter_intra) {
358     dsp_.filter_intra_predictor(dest, dest_stride, top_row, left_column,
359                                 prediction_parameters.filter_intra_mode, width,
360                                 height);
361   } else if (is_directional_mode) {
362     DirectionalPrediction(block, plane, x, y, has_left, has_top, needs_left,
363                           needs_top, prediction_angle, width, height, max_x,
364                           max_y, tx_size, top_row, left_column);
365   } else {
366     const dsp::IntraPredictor predictor =
367         GetIntraPredictor(mode, has_left, has_top);
368     assert(predictor != dsp::kNumIntraPredictors);
369     dsp_.intra_predictors[tx_size][predictor](dest, dest_stride, top_row,
370                                               left_column);
371   }
372 }
373 
374 template void Tile::IntraPrediction<uint8_t>(const Block& block, Plane plane,
375                                              int x, int y, bool has_left,
376                                              bool has_top, bool has_top_right,
377                                              bool has_bottom_left,
378                                              PredictionMode mode,
379                                              TransformSize tx_size);
380 #if LIBGAV1_MAX_BITDEPTH >= 10
381 template void Tile::IntraPrediction<uint16_t>(const Block& block, Plane plane,
382                                               int x, int y, bool has_left,
383                                               bool has_top, bool has_top_right,
384                                               bool has_bottom_left,
385                                               PredictionMode mode,
386                                               TransformSize tx_size);
387 #endif
388 
389 constexpr BitMaskSet kPredictionModeSmoothMask(kPredictionModeSmooth,
390                                                kPredictionModeSmoothHorizontal,
391                                                kPredictionModeSmoothVertical);
392 
IsSmoothPrediction(int row,int column,Plane plane) const393 bool Tile::IsSmoothPrediction(int row, int column, Plane plane) const {
394   const BlockParameters& bp = *block_parameters_holder_.Find(row, column);
395   PredictionMode mode;
396   if (plane == kPlaneY) {
397     mode = bp.y_mode;
398   } else {
399     if (bp.reference_frame[0] > kReferenceFrameIntra) return false;
400     mode = bp.uv_mode;
401   }
402   return kPredictionModeSmoothMask.Contains(mode);
403 }
404 
GetIntraEdgeFilterType(const Block & block,Plane plane) const405 int Tile::GetIntraEdgeFilterType(const Block& block, Plane plane) const {
406   const int subsampling_x = subsampling_x_[plane];
407   const int subsampling_y = subsampling_y_[plane];
408   if (block.top_available[plane]) {
409     const int row = block.row4x4 - 1 - (block.row4x4 & subsampling_y);
410     const int column = block.column4x4 + (~block.column4x4 & subsampling_x);
411     if (IsSmoothPrediction(row, column, plane)) return 1;
412   }
413   if (block.left_available[plane]) {
414     const int row = block.row4x4 + (~block.row4x4 & subsampling_y);
415     const int column = block.column4x4 - 1 - (block.column4x4 & subsampling_x);
416     if (IsSmoothPrediction(row, column, plane)) return 1;
417   }
418   return 0;
419 }
420 
421 template <typename Pixel>
DirectionalPrediction(const Block & block,Plane plane,int x,int y,bool has_left,bool has_top,bool needs_left,bool needs_top,int prediction_angle,int width,int height,int max_x,int max_y,TransformSize tx_size,Pixel * const top_row,Pixel * const left_column)422 void Tile::DirectionalPrediction(const Block& block, Plane plane, int x, int y,
423                                  bool has_left, bool has_top, bool needs_left,
424                                  bool needs_top, int prediction_angle,
425                                  int width, int height, int max_x, int max_y,
426                                  TransformSize tx_size, Pixel* const top_row,
427                                  Pixel* const left_column) {
428   Array2DView<Pixel> buffer(buffer_[plane].rows(),
429                             buffer_[plane].columns() / sizeof(Pixel),
430                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
431   Pixel* const dest = &buffer[y][x];
432   const ptrdiff_t stride = buffer_[plane].columns();
433   if (prediction_angle == 90) {
434     dsp_.intra_predictors[tx_size][dsp::kIntraPredictorVertical](
435         dest, stride, top_row, left_column);
436     return;
437   }
438   if (prediction_angle == 180) {
439     dsp_.intra_predictors[tx_size][dsp::kIntraPredictorHorizontal](
440         dest, stride, top_row, left_column);
441     return;
442   }
443 
444   bool upsampled_top = false;
445   bool upsampled_left = false;
446   if (sequence_header_.enable_intra_edge_filter) {
447     const int filter_type = GetIntraEdgeFilterType(block, plane);
448     if (prediction_angle > 90 && prediction_angle < 180 &&
449         (width + height) >= 24) {
450       // 7.11.2.7.
451       left_column[-1] = top_row[-1] = RightShiftWithRounding(
452           left_column[0] * 5 + top_row[-1] * 6 + top_row[0] * 5, 4);
453     }
454     if (has_top && needs_top) {
455       const int strength = GetIntraEdgeFilterStrength(
456           width, height, filter_type, prediction_angle - 90);
457       if (strength > 0) {
458         const int num_pixels = std::min(width, max_x - x + 1) +
459                                ((prediction_angle < 90) ? height : 0) + 1;
460         dsp_.intra_edge_filter(top_row - 1, num_pixels, strength);
461       }
462     }
463     if (has_left && needs_left) {
464       const int strength = GetIntraEdgeFilterStrength(
465           width, height, filter_type, prediction_angle - 180);
466       if (strength > 0) {
467         const int num_pixels = std::min(height, max_y - y + 1) +
468                                ((prediction_angle > 180) ? width : 0) + 1;
469         dsp_.intra_edge_filter(left_column - 1, num_pixels, strength);
470       }
471     }
472     upsampled_top = DoIntraEdgeUpsampling(width, height, filter_type,
473                                           prediction_angle - 90);
474     if (upsampled_top && needs_top) {
475       const int num_pixels = width + ((prediction_angle < 90) ? height : 0);
476       dsp_.intra_edge_upsampler(top_row, num_pixels);
477     }
478     upsampled_left = DoIntraEdgeUpsampling(width, height, filter_type,
479                                            prediction_angle - 180);
480     if (upsampled_left && needs_left) {
481       const int num_pixels = height + ((prediction_angle > 180) ? width : 0);
482       dsp_.intra_edge_upsampler(left_column, num_pixels);
483     }
484   }
485 
486   if (prediction_angle < 90) {
487     const int dx = GetDirectionalIntraPredictorDerivative(prediction_angle);
488     dsp_.directional_intra_predictor_zone1(dest, stride, top_row, width, height,
489                                            dx, upsampled_top);
490   } else if (prediction_angle < 180) {
491     const int dx =
492         GetDirectionalIntraPredictorDerivative(180 - prediction_angle);
493     const int dy =
494         GetDirectionalIntraPredictorDerivative(prediction_angle - 90);
495     dsp_.directional_intra_predictor_zone2(dest, stride, top_row, left_column,
496                                            width, height, dx, dy, upsampled_top,
497                                            upsampled_left);
498   } else {
499     assert(prediction_angle < 270);
500     const int dy =
501         GetDirectionalIntraPredictorDerivative(270 - prediction_angle);
502     dsp_.directional_intra_predictor_zone3(dest, stride, left_column, width,
503                                            height, dy, upsampled_left);
504   }
505 }
506 
507 template <typename Pixel>
PalettePrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const int x,const int y,const TransformSize tx_size)508 void Tile::PalettePrediction(const Block& block, const Plane plane,
509                              const int start_x, const int start_y, const int x,
510                              const int y, const TransformSize tx_size) {
511   const int tx_width = kTransformWidth[tx_size];
512   const int tx_height = kTransformHeight[tx_size];
513   const uint16_t* const palette = block.bp->palette_mode_info.color[plane];
514   const PlaneType plane_type = GetPlaneType(plane);
515   const int x4 = MultiplyBy4(x);
516   const int y4 = MultiplyBy4(y);
517   Array2DView<Pixel> buffer(buffer_[plane].rows(),
518                             buffer_[plane].columns() / sizeof(Pixel),
519                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
520   for (int row = 0; row < tx_height; ++row) {
521     assert(block.bp->prediction_parameters
522                ->color_index_map[plane_type][y4 + row] != nullptr);
523     for (int column = 0; column < tx_width; ++column) {
524       buffer[start_y + row][start_x + column] =
525           palette[block.bp->prediction_parameters
526                       ->color_index_map[plane_type][y4 + row][x4 + column]];
527     }
528   }
529 }
530 
531 template void Tile::PalettePrediction<uint8_t>(
532     const Block& block, const Plane plane, const int start_x, const int start_y,
533     const int x, const int y, const TransformSize tx_size);
534 #if LIBGAV1_MAX_BITDEPTH >= 10
535 template void Tile::PalettePrediction<uint16_t>(
536     const Block& block, const Plane plane, const int start_x, const int start_y,
537     const int x, const int y, const TransformSize tx_size);
538 #endif
539 
540 template <typename Pixel>
ChromaFromLumaPrediction(const Block & block,const Plane plane,const int start_x,const int start_y,const TransformSize tx_size)541 void Tile::ChromaFromLumaPrediction(const Block& block, const Plane plane,
542                                     const int start_x, const int start_y,
543                                     const TransformSize tx_size) {
544   const int subsampling_x = subsampling_x_[plane];
545   const int subsampling_y = subsampling_y_[plane];
546   const PredictionParameters& prediction_parameters =
547       *block.bp->prediction_parameters;
548   Array2DView<Pixel> y_buffer(
549       buffer_[kPlaneY].rows(), buffer_[kPlaneY].columns() / sizeof(Pixel),
550       reinterpret_cast<Pixel*>(&buffer_[kPlaneY][0][0]));
551   if (!block.scratch_buffer->cfl_luma_buffer_valid) {
552     const int luma_x = start_x << subsampling_x;
553     const int luma_y = start_y << subsampling_y;
554     dsp_.cfl_subsamplers[tx_size][subsampling_x + subsampling_y](
555         block.scratch_buffer->cfl_luma_buffer,
556         prediction_parameters.max_luma_width - luma_x,
557         prediction_parameters.max_luma_height - luma_y,
558         reinterpret_cast<uint8_t*>(&y_buffer[luma_y][luma_x]),
559         buffer_[kPlaneY].columns());
560     block.scratch_buffer->cfl_luma_buffer_valid = true;
561   }
562   Array2DView<Pixel> buffer(buffer_[plane].rows(),
563                             buffer_[plane].columns() / sizeof(Pixel),
564                             reinterpret_cast<Pixel*>(&buffer_[plane][0][0]));
565   dsp_.cfl_intra_predictors[tx_size](
566       reinterpret_cast<uint8_t*>(&buffer[start_y][start_x]),
567       buffer_[plane].columns(), block.scratch_buffer->cfl_luma_buffer,
568       (plane == kPlaneU) ? prediction_parameters.cfl_alpha_u
569                          : prediction_parameters.cfl_alpha_v);
570 }
571 
572 template void Tile::ChromaFromLumaPrediction<uint8_t>(
573     const Block& block, const Plane plane, const int start_x, const int start_y,
574     const TransformSize tx_size);
575 #if LIBGAV1_MAX_BITDEPTH >= 10
576 template void Tile::ChromaFromLumaPrediction<uint16_t>(
577     const Block& block, const Plane plane, const int start_x, const int start_y,
578     const TransformSize tx_size);
579 #endif
580 
InterIntraPrediction(uint16_t * const prediction_0,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const PredictionParameters & prediction_parameters,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,uint8_t * const dest,const ptrdiff_t dest_stride)581 void Tile::InterIntraPrediction(
582     uint16_t* const prediction_0, const uint8_t* const prediction_mask,
583     const ptrdiff_t prediction_mask_stride,
584     const PredictionParameters& prediction_parameters,
585     const int prediction_width, const int prediction_height,
586     const int subsampling_x, const int subsampling_y, uint8_t* const dest,
587     const ptrdiff_t dest_stride) {
588   assert(prediction_mask != nullptr);
589   assert(prediction_parameters.compound_prediction_type ==
590              kCompoundPredictionTypeIntra ||
591          prediction_parameters.compound_prediction_type ==
592              kCompoundPredictionTypeWedge);
593   // The first buffer of InterIntra is from inter prediction.
594   // The second buffer is from intra prediction.
595 #if LIBGAV1_MAX_BITDEPTH >= 10
596   if (sequence_header_.color_config.bitdepth > 8) {
597     GetMaskBlendFunc(dsp_, /*is_inter_intra=*/true,
598                      prediction_parameters.is_wedge_inter_intra, subsampling_x,
599                      subsampling_y)(
600         prediction_0, reinterpret_cast<uint16_t*>(dest),
601         dest_stride / sizeof(uint16_t), prediction_mask, prediction_mask_stride,
602         prediction_width, prediction_height, dest, dest_stride);
603     return;
604   }
605 #endif
606   const int function_index = prediction_parameters.is_wedge_inter_intra
607                                  ? subsampling_x + subsampling_y
608                                  : 0;
609   // |is_inter_intra| prediction values are stored in a Pixel buffer but it is
610   // currently declared as a uint16_t buffer.
611   // TODO(johannkoenig): convert the prediction buffer to a uint8_t buffer and
612   // remove the reinterpret_cast.
613   dsp_.inter_intra_mask_blend_8bpp[function_index](
614       reinterpret_cast<uint8_t*>(prediction_0), dest, dest_stride,
615       prediction_mask, prediction_mask_stride, prediction_width,
616       prediction_height);
617 }
618 
CompoundInterPrediction(const Block & block,const uint8_t * const prediction_mask,const ptrdiff_t prediction_mask_stride,const int prediction_width,const int prediction_height,const int subsampling_x,const int subsampling_y,const int candidate_row,const int candidate_column,uint8_t * dest,const ptrdiff_t dest_stride)619 void Tile::CompoundInterPrediction(
620     const Block& block, const uint8_t* const prediction_mask,
621     const ptrdiff_t prediction_mask_stride, const int prediction_width,
622     const int prediction_height, const int subsampling_x,
623     const int subsampling_y, const int candidate_row,
624     const int candidate_column, uint8_t* dest, const ptrdiff_t dest_stride) {
625   const PredictionParameters& prediction_parameters =
626       *block.bp->prediction_parameters;
627 
628   void* prediction[2];
629 #if LIBGAV1_MAX_BITDEPTH >= 10
630   const int bitdepth = sequence_header_.color_config.bitdepth;
631   if (bitdepth > 8) {
632     prediction[0] = block.scratch_buffer->prediction_buffer[0];
633     prediction[1] = block.scratch_buffer->prediction_buffer[1];
634   } else {
635 #endif
636     prediction[0] = block.scratch_buffer->compound_prediction_buffer_8bpp[0];
637     prediction[1] = block.scratch_buffer->compound_prediction_buffer_8bpp[1];
638 #if LIBGAV1_MAX_BITDEPTH >= 10
639   }
640 #endif
641 
642   switch (prediction_parameters.compound_prediction_type) {
643     case kCompoundPredictionTypeWedge:
644     case kCompoundPredictionTypeDiffWeighted:
645       GetMaskBlendFunc(dsp_, /*is_inter_intra=*/false,
646                        prediction_parameters.is_wedge_inter_intra,
647                        subsampling_x, subsampling_y)(
648           prediction[0], prediction[1],
649           /*prediction_stride=*/prediction_width, prediction_mask,
650           prediction_mask_stride, prediction_width, prediction_height, dest,
651           dest_stride);
652       break;
653     case kCompoundPredictionTypeDistance:
654       DistanceWeightedPrediction(prediction[0], prediction[1], prediction_width,
655                                  prediction_height, candidate_row,
656                                  candidate_column, dest, dest_stride);
657       break;
658     default:
659       assert(prediction_parameters.compound_prediction_type ==
660              kCompoundPredictionTypeAverage);
661       dsp_.average_blend(prediction[0], prediction[1], prediction_width,
662                          prediction_height, dest, dest_stride);
663       break;
664   }
665 }
666 
GetWarpParams(const Block & block,const Plane plane,const int prediction_width,const int prediction_height,const PredictionParameters & prediction_parameters,const ReferenceFrameType reference_type,bool * const is_local_valid,GlobalMotion * const global_motion_params,GlobalMotion * const local_warp_params) const667 GlobalMotion* Tile::GetWarpParams(
668     const Block& block, const Plane plane, const int prediction_width,
669     const int prediction_height,
670     const PredictionParameters& prediction_parameters,
671     const ReferenceFrameType reference_type, bool* const is_local_valid,
672     GlobalMotion* const global_motion_params,
673     GlobalMotion* const local_warp_params) const {
674   if (prediction_width < 8 || prediction_height < 8 ||
675       frame_header_.force_integer_mv == 1) {
676     return nullptr;
677   }
678   if (plane == kPlaneY) {
679     *is_local_valid =
680         prediction_parameters.motion_mode == kMotionModeLocalWarp &&
681         WarpEstimation(
682             prediction_parameters.num_warp_samples, DivideBy4(prediction_width),
683             DivideBy4(prediction_height), block.row4x4, block.column4x4,
684             block.bp->mv.mv[0], prediction_parameters.warp_estimate_candidates,
685             local_warp_params) &&
686         SetupShear(local_warp_params);
687   }
688   if (prediction_parameters.motion_mode == kMotionModeLocalWarp &&
689       *is_local_valid) {
690     return local_warp_params;
691   }
692   if (!IsScaled(reference_type)) {
693     GlobalMotionTransformationType global_motion_type =
694         (reference_type != kReferenceFrameIntra)
695             ? global_motion_params->type
696             : kNumGlobalMotionTransformationTypes;
697     const bool is_global_valid =
698         IsGlobalMvBlock(block.bp->is_global_mv_block, global_motion_type) &&
699         SetupShear(global_motion_params);
700     // Valid global motion type implies reference type can't be intra.
701     assert(!is_global_valid || reference_type != kReferenceFrameIntra);
702     if (is_global_valid) return global_motion_params;
703   }
704   return nullptr;
705 }
706 
InterPrediction(const Block & block,const Plane plane,const int x,const int y,const int prediction_width,const int prediction_height,int candidate_row,int candidate_column,bool * const is_local_valid,GlobalMotion * const local_warp_params)707 bool Tile::InterPrediction(const Block& block, const Plane plane, const int x,
708                            const int y, const int prediction_width,
709                            const int prediction_height, int candidate_row,
710                            int candidate_column, bool* const is_local_valid,
711                            GlobalMotion* const local_warp_params) {
712   const int bitdepth = sequence_header_.color_config.bitdepth;
713   const BlockParameters& bp = *block.bp;
714   const BlockParameters& bp_reference =
715       *block_parameters_holder_.Find(candidate_row, candidate_column);
716   const bool is_compound =
717       bp_reference.reference_frame[1] > kReferenceFrameIntra;
718   assert(bp.is_inter);
719   const bool is_inter_intra = bp.reference_frame[1] == kReferenceFrameIntra;
720 
721   const PredictionParameters& prediction_parameters =
722       *block.bp->prediction_parameters;
723   uint8_t* const dest = GetStartPoint(buffer_, plane, x, y, bitdepth);
724   const ptrdiff_t dest_stride = buffer_[plane].columns();  // In bytes.
725   for (int index = 0; index < 1 + static_cast<int>(is_compound); ++index) {
726     const ReferenceFrameType reference_type =
727         bp_reference.reference_frame[index];
728     GlobalMotion global_motion_params =
729         frame_header_.global_motion[reference_type];
730     GlobalMotion* warp_params =
731         GetWarpParams(block, plane, prediction_width, prediction_height,
732                       prediction_parameters, reference_type, is_local_valid,
733                       &global_motion_params, local_warp_params);
734     if (warp_params != nullptr) {
735       if (!BlockWarpProcess(block, plane, index, x, y, prediction_width,
736                             prediction_height, warp_params, is_compound,
737                             is_inter_intra, dest, dest_stride)) {
738         return false;
739       }
740     } else {
741       const int reference_index =
742           prediction_parameters.use_intra_block_copy
743               ? -1
744               : frame_header_.reference_frame_index[reference_type -
745                                                     kReferenceFrameLast];
746       if (!BlockInterPrediction(
747               block, plane, reference_index, bp_reference.mv.mv[index], x, y,
748               prediction_width, prediction_height, candidate_row,
749               candidate_column, block.scratch_buffer->prediction_buffer[index],
750               is_compound, is_inter_intra, dest, dest_stride)) {
751         return false;
752       }
753     }
754   }
755 
756   const int subsampling_x = subsampling_x_[plane];
757   const int subsampling_y = subsampling_y_[plane];
758   ptrdiff_t prediction_mask_stride = 0;
759   const uint8_t* prediction_mask = nullptr;
760   if (prediction_parameters.compound_prediction_type ==
761       kCompoundPredictionTypeWedge) {
762     const Array2D<uint8_t>& wedge_mask =
763         wedge_masks_[GetWedgeBlockSizeIndex(block.size)]
764                     [prediction_parameters.wedge_sign]
765                     [prediction_parameters.wedge_index];
766     prediction_mask = wedge_mask[0];
767     prediction_mask_stride = wedge_mask.columns();
768   } else if (prediction_parameters.compound_prediction_type ==
769              kCompoundPredictionTypeIntra) {
770     // 7.11.3.13. The inter intra masks are precomputed and stored as a set of
771     // look up tables.
772     assert(prediction_parameters.inter_intra_mode < kNumInterIntraModes);
773     prediction_mask =
774         kInterIntraMasks[prediction_parameters.inter_intra_mode]
775                         [GetInterIntraMaskLookupIndex(prediction_width)]
776                         [GetInterIntraMaskLookupIndex(prediction_height)];
777     prediction_mask_stride = prediction_width;
778   } else if (prediction_parameters.compound_prediction_type ==
779              kCompoundPredictionTypeDiffWeighted) {
780     if (plane == kPlaneY) {
781       assert(prediction_width >= 8);
782       assert(prediction_height >= 8);
783       dsp_.weight_mask[FloorLog2(prediction_width) - 3]
784                       [FloorLog2(prediction_height) - 3]
785                       [static_cast<int>(prediction_parameters.mask_is_inverse)](
786                           block.scratch_buffer->prediction_buffer[0],
787                           block.scratch_buffer->prediction_buffer[1],
788                           block.scratch_buffer->weight_mask,
789                           kMaxSuperBlockSizeInPixels);
790     }
791     prediction_mask = block.scratch_buffer->weight_mask;
792     prediction_mask_stride = kMaxSuperBlockSizeInPixels;
793   }
794 
795   if (is_compound) {
796     CompoundInterPrediction(block, prediction_mask, prediction_mask_stride,
797                             prediction_width, prediction_height, subsampling_x,
798                             subsampling_y, candidate_row, candidate_column,
799                             dest, dest_stride);
800   } else if (prediction_parameters.motion_mode == kMotionModeObmc) {
801     // Obmc mode is allowed only for single reference (!is_compound).
802     return ObmcPrediction(block, plane, prediction_width, prediction_height);
803   } else if (is_inter_intra) {
804     // InterIntra and obmc must be mutually exclusive.
805     InterIntraPrediction(
806         block.scratch_buffer->prediction_buffer[0], prediction_mask,
807         prediction_mask_stride, prediction_parameters, prediction_width,
808         prediction_height, subsampling_x, subsampling_y, dest, dest_stride);
809   }
810   return true;
811 }
812 
ObmcBlockPrediction(const Block & block,const MotionVector & mv,const Plane plane,const int reference_frame_index,const int width,const int height,const int x,const int y,const int candidate_row,const int candidate_column,const ObmcDirection blending_direction)813 bool Tile::ObmcBlockPrediction(const Block& block, const MotionVector& mv,
814                                const Plane plane,
815                                const int reference_frame_index, const int width,
816                                const int height, const int x, const int y,
817                                const int candidate_row,
818                                const int candidate_column,
819                                const ObmcDirection blending_direction) {
820   const int bitdepth = sequence_header_.color_config.bitdepth;
821   // Obmc's prediction needs to be clipped before blending with above/left
822   // prediction blocks.
823   // Obmc prediction is used only when is_compound is false. So it is safe to
824   // use prediction_buffer[1] as a temporary buffer for the Obmc prediction.
825   static_assert(sizeof(block.scratch_buffer->prediction_buffer[1]) >=
826                     64 * 64 * sizeof(uint16_t),
827                 "");
828   auto* const obmc_buffer =
829       reinterpret_cast<uint8_t*>(block.scratch_buffer->prediction_buffer[1]);
830   const ptrdiff_t obmc_buffer_stride =
831       (bitdepth == 8) ? width : width * sizeof(uint16_t);
832   if (!BlockInterPrediction(block, plane, reference_frame_index, mv, x, y,
833                             width, height, candidate_row, candidate_column,
834                             nullptr, false, false, obmc_buffer,
835                             obmc_buffer_stride)) {
836     return false;
837   }
838 
839   uint8_t* const prediction = GetStartPoint(buffer_, plane, x, y, bitdepth);
840   const ptrdiff_t prediction_stride = buffer_[plane].columns();
841   dsp_.obmc_blend[blending_direction](prediction, prediction_stride, width,
842                                       height, obmc_buffer, obmc_buffer_stride);
843   return true;
844 }
845 
ObmcPrediction(const Block & block,const Plane plane,const int width,const int height)846 bool Tile::ObmcPrediction(const Block& block, const Plane plane,
847                           const int width, const int height) {
848   const int subsampling_x = subsampling_x_[plane];
849   const int subsampling_y = subsampling_y_[plane];
850   if (block.top_available[kPlaneY] &&
851       !IsBlockSmallerThan8x8(block.residual_size[plane])) {
852     const int num_limit = std::min(uint8_t{4}, k4x4WidthLog2[block.size]);
853     const int column4x4_max =
854         std::min(block.column4x4 + block.width4x4, frame_header_.columns4x4);
855     const int candidate_row = block.row4x4 - 1;
856     const int block_start_y = MultiplyBy4(block.row4x4) >> subsampling_y;
857     int column4x4 = block.column4x4;
858     const int prediction_height = std::min(height >> 1, 32 >> subsampling_y);
859     for (int i = 0, step; i < num_limit && column4x4 < column4x4_max;
860          column4x4 += step) {
861       const int candidate_column = column4x4 | 1;
862       const BlockParameters& bp_top =
863           *block_parameters_holder_.Find(candidate_row, candidate_column);
864       const int candidate_block_size = bp_top.size;
865       step = Clip3(kNum4x4BlocksWide[candidate_block_size], 2, 16);
866       if (bp_top.reference_frame[0] > kReferenceFrameIntra) {
867         i++;
868         const int candidate_reference_frame_index =
869             frame_header_.reference_frame_index[bp_top.reference_frame[0] -
870                                                 kReferenceFrameLast];
871         const int prediction_width =
872             std::min(width, MultiplyBy4(step) >> subsampling_x);
873         if (!ObmcBlockPrediction(
874                 block, bp_top.mv.mv[0], plane, candidate_reference_frame_index,
875                 prediction_width, prediction_height,
876                 MultiplyBy4(column4x4) >> subsampling_x, block_start_y,
877                 candidate_row, candidate_column, kObmcDirectionVertical)) {
878           return false;
879         }
880       }
881     }
882   }
883 
884   if (block.left_available[kPlaneY]) {
885     const int num_limit = std::min(uint8_t{4}, k4x4HeightLog2[block.size]);
886     const int row4x4_max =
887         std::min(block.row4x4 + block.height4x4, frame_header_.rows4x4);
888     const int candidate_column = block.column4x4 - 1;
889     int row4x4 = block.row4x4;
890     const int block_start_x = MultiplyBy4(block.column4x4) >> subsampling_x;
891     const int prediction_width = std::min(width >> 1, 32 >> subsampling_x);
892     for (int i = 0, step; i < num_limit && row4x4 < row4x4_max;
893          row4x4 += step) {
894       const int candidate_row = row4x4 | 1;
895       const BlockParameters& bp_left =
896           *block_parameters_holder_.Find(candidate_row, candidate_column);
897       const int candidate_block_size = bp_left.size;
898       step = Clip3(kNum4x4BlocksHigh[candidate_block_size], 2, 16);
899       if (bp_left.reference_frame[0] > kReferenceFrameIntra) {
900         i++;
901         const int candidate_reference_frame_index =
902             frame_header_.reference_frame_index[bp_left.reference_frame[0] -
903                                                 kReferenceFrameLast];
904         const int prediction_height =
905             std::min(height, MultiplyBy4(step) >> subsampling_y);
906         if (!ObmcBlockPrediction(
907                 block, bp_left.mv.mv[0], plane, candidate_reference_frame_index,
908                 prediction_width, prediction_height, block_start_x,
909                 MultiplyBy4(row4x4) >> subsampling_y, candidate_row,
910                 candidate_column, kObmcDirectionHorizontal)) {
911           return false;
912         }
913       }
914     }
915   }
916   return true;
917 }
918 
DistanceWeightedPrediction(void * prediction_0,void * prediction_1,const int width,const int height,const int candidate_row,const int candidate_column,uint8_t * dest,ptrdiff_t dest_stride)919 void Tile::DistanceWeightedPrediction(void* prediction_0, void* prediction_1,
920                                       const int width, const int height,
921                                       const int candidate_row,
922                                       const int candidate_column, uint8_t* dest,
923                                       ptrdiff_t dest_stride) {
924   int distance[2];
925   int weight[2];
926   for (int reference = 0; reference < 2; ++reference) {
927     const BlockParameters& bp =
928         *block_parameters_holder_.Find(candidate_row, candidate_column);
929     // Note: distance[0] and distance[1] correspond to relative distance
930     // between current frame and reference frame [1] and [0], respectively.
931     distance[1 - reference] = std::min(
932         std::abs(static_cast<int>(
933             current_frame_.reference_info()
934                 ->relative_distance_from[bp.reference_frame[reference]])),
935         static_cast<int>(kMaxFrameDistance));
936   }
937   GetDistanceWeights(distance, weight);
938 
939   dsp_.distance_weighted_blend(prediction_0, prediction_1, weight[0], weight[1],
940                                width, height, dest, dest_stride);
941 }
942 
ScaleMotionVector(const MotionVector & mv,const Plane plane,const int reference_frame_index,const int x,const int y,int * const start_x,int * const start_y,int * const step_x,int * const step_y)943 void Tile::ScaleMotionVector(const MotionVector& mv, const Plane plane,
944                              const int reference_frame_index, const int x,
945                              const int y, int* const start_x,
946                              int* const start_y, int* const step_x,
947                              int* const step_y) {
948   const int reference_upscaled_width =
949       (reference_frame_index == -1)
950           ? frame_header_.upscaled_width
951           : reference_frames_[reference_frame_index]->upscaled_width();
952   const int reference_height =
953       (reference_frame_index == -1)
954           ? frame_header_.height
955           : reference_frames_[reference_frame_index]->frame_height();
956   assert(2 * frame_header_.width >= reference_upscaled_width &&
957          2 * frame_header_.height >= reference_height &&
958          frame_header_.width <= 16 * reference_upscaled_width &&
959          frame_header_.height <= 16 * reference_height);
960   const bool is_scaled_x = reference_upscaled_width != frame_header_.width;
961   const bool is_scaled_y = reference_height != frame_header_.height;
962   const int half_sample = 1 << (kSubPixelBits - 1);
963   int orig_x = (x << kSubPixelBits) + ((2 * mv.mv[1]) >> subsampling_x_[plane]);
964   int orig_y = (y << kSubPixelBits) + ((2 * mv.mv[0]) >> subsampling_y_[plane]);
965   const int rounding_offset =
966       DivideBy2(1 << (kScaleSubPixelBits - kSubPixelBits));
967   if (is_scaled_x) {
968     const int scale_x = ((reference_upscaled_width << kReferenceScaleShift) +
969                          DivideBy2(frame_header_.width)) /
970                         frame_header_.width;
971     *step_x = RightShiftWithRoundingSigned(
972         scale_x, kReferenceScaleShift - kScaleSubPixelBits);
973     orig_x += half_sample;
974     // When frame size is 4k and above, orig_x can be above 16 bits, scale_x can
975     // be up to 15 bits. So we use int64_t to hold base_x.
976     const int64_t base_x = static_cast<int64_t>(orig_x) * scale_x -
977                            (half_sample << kReferenceScaleShift);
978     *start_x =
979         RightShiftWithRoundingSigned(
980             base_x, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
981         rounding_offset;
982   } else {
983     *step_x = 1 << kScaleSubPixelBits;
984     *start_x = LeftShift(orig_x, 6) + rounding_offset;
985   }
986   if (is_scaled_y) {
987     const int scale_y = ((reference_height << kReferenceScaleShift) +
988                          DivideBy2(frame_header_.height)) /
989                         frame_header_.height;
990     *step_y = RightShiftWithRoundingSigned(
991         scale_y, kReferenceScaleShift - kScaleSubPixelBits);
992     orig_y += half_sample;
993     const int64_t base_y = static_cast<int64_t>(orig_y) * scale_y -
994                            (half_sample << kReferenceScaleShift);
995     *start_y =
996         RightShiftWithRoundingSigned(
997             base_y, kReferenceScaleShift + kSubPixelBits - kScaleSubPixelBits) +
998         rounding_offset;
999   } else {
1000     *step_y = 1 << kScaleSubPixelBits;
1001     *start_y = LeftShift(orig_y, 6) + rounding_offset;
1002   }
1003 }
1004 
1005 // static.
GetReferenceBlockPosition(const int reference_frame_index,const bool is_scaled,const int width,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int start_x,const int start_y,const int step_x,const int step_y,const int left_border,const int right_border,const int top_border,const int bottom_border,int * ref_block_start_x,int * ref_block_start_y,int * ref_block_end_x)1006 bool Tile::GetReferenceBlockPosition(
1007     const int reference_frame_index, const bool is_scaled, const int width,
1008     const int height, const int ref_start_x, const int ref_last_x,
1009     const int ref_start_y, const int ref_last_y, const int start_x,
1010     const int start_y, const int step_x, const int step_y,
1011     const int left_border, const int right_border, const int top_border,
1012     const int bottom_border, int* ref_block_start_x, int* ref_block_start_y,
1013     int* ref_block_end_x) {
1014   *ref_block_start_x = GetPixelPositionFromHighScale(start_x, 0, 0);
1015   *ref_block_start_y = GetPixelPositionFromHighScale(start_y, 0, 0);
1016   if (reference_frame_index == -1) {
1017     return false;
1018   }
1019   *ref_block_start_x -= kConvolveBorderLeftTop;
1020   *ref_block_start_y -= kConvolveBorderLeftTop;
1021   *ref_block_end_x = GetPixelPositionFromHighScale(start_x, step_x, width - 1) +
1022                      kConvolveBorderRight;
1023   int ref_block_end_y =
1024       GetPixelPositionFromHighScale(start_y, step_y, height - 1) +
1025       kConvolveBorderBottom;
1026   if (is_scaled) {
1027     const int block_height =
1028         (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1029          kScaleSubPixelBits) +
1030         kSubPixelTaps;
1031     ref_block_end_y = *ref_block_start_y + block_height - 1;
1032   }
1033   // Determines if we need to extend beyond the left/right/top/bottom border.
1034   return *ref_block_start_x < (ref_start_x - left_border) ||
1035          *ref_block_end_x > (ref_last_x + right_border) ||
1036          *ref_block_start_y < (ref_start_y - top_border) ||
1037          ref_block_end_y > (ref_last_y + bottom_border);
1038 }
1039 
1040 // Builds a block as the input for convolve, by copying the content of
1041 // reference frame (either a decoded reference frame, or current frame).
1042 // |block_extended_width| is the combined width of the block and its borders.
1043 template <typename Pixel>
BuildConvolveBlock(const Plane plane,const int reference_frame_index,const bool is_scaled,const int height,const int ref_start_x,const int ref_last_x,const int ref_start_y,const int ref_last_y,const int step_y,const int ref_block_start_x,const int ref_block_end_x,const int ref_block_start_y,uint8_t * block_buffer,ptrdiff_t convolve_buffer_stride,ptrdiff_t block_extended_width)1044 void Tile::BuildConvolveBlock(
1045     const Plane plane, const int reference_frame_index, const bool is_scaled,
1046     const int height, const int ref_start_x, const int ref_last_x,
1047     const int ref_start_y, const int ref_last_y, const int step_y,
1048     const int ref_block_start_x, const int ref_block_end_x,
1049     const int ref_block_start_y, uint8_t* block_buffer,
1050     ptrdiff_t convolve_buffer_stride, ptrdiff_t block_extended_width) {
1051   const YuvBuffer* const reference_buffer =
1052       (reference_frame_index == -1)
1053           ? current_frame_.buffer()
1054           : reference_frames_[reference_frame_index]->buffer();
1055   Array2DView<const Pixel> reference_block(
1056       reference_buffer->height(plane),
1057       reference_buffer->stride(plane) / sizeof(Pixel),
1058       reinterpret_cast<const Pixel*>(reference_buffer->data(plane)));
1059   auto* const block_head = reinterpret_cast<Pixel*>(block_buffer);
1060   convolve_buffer_stride /= sizeof(Pixel);
1061   int block_height = height + kConvolveBorderLeftTop + kConvolveBorderBottom;
1062   if (is_scaled) {
1063     block_height = (((height - 1) * step_y + (1 << kScaleSubPixelBits) - 1) >>
1064                     kScaleSubPixelBits) +
1065                    kSubPixelTaps;
1066   }
1067   const int copy_start_x = Clip3(ref_block_start_x, ref_start_x, ref_last_x);
1068   const int copy_start_y = Clip3(ref_block_start_y, ref_start_y, ref_last_y);
1069   const int copy_end_x = Clip3(ref_block_end_x, copy_start_x, ref_last_x);
1070   const int block_width = copy_end_x - copy_start_x + 1;
1071   const bool extend_left = ref_block_start_x < ref_start_x;
1072   const bool extend_right = ref_block_end_x > ref_last_x;
1073   const bool out_of_left = copy_start_x > ref_block_end_x;
1074   const bool out_of_right = copy_end_x < ref_block_start_x;
1075   if (out_of_left || out_of_right) {
1076     const int ref_x = out_of_left ? copy_start_x : copy_end_x;
1077     Pixel* buf_ptr = block_head;
1078     for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1079       Memset(buf_ptr, reference_block[ref_y][ref_x], block_extended_width);
1080       if (ref_block_start_y + y >= ref_start_y &&
1081           ref_block_start_y + y < ref_last_y) {
1082         ++ref_y;
1083       }
1084       buf_ptr += convolve_buffer_stride;
1085     }
1086   } else {
1087     Pixel* buf_ptr = block_head;
1088     const int left_width = copy_start_x - ref_block_start_x;
1089     for (int y = 0, ref_y = copy_start_y; y < block_height; ++y) {
1090       if (extend_left) {
1091         Memset(buf_ptr, reference_block[ref_y][copy_start_x], left_width);
1092       }
1093       memcpy(buf_ptr + left_width, &reference_block[ref_y][copy_start_x],
1094              block_width * sizeof(Pixel));
1095       if (extend_right) {
1096         Memset(buf_ptr + left_width + block_width,
1097                reference_block[ref_y][copy_end_x],
1098                block_extended_width - left_width - block_width);
1099       }
1100       if (ref_block_start_y + y >= ref_start_y &&
1101           ref_block_start_y + y < ref_last_y) {
1102         ++ref_y;
1103       }
1104       buf_ptr += convolve_buffer_stride;
1105     }
1106   }
1107 }
1108 
BlockInterPrediction(const Block & block,const Plane plane,const int reference_frame_index,const MotionVector & mv,const int x,const int y,const int width,const int height,const int candidate_row,const int candidate_column,uint16_t * const prediction,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1109 bool Tile::BlockInterPrediction(
1110     const Block& block, const Plane plane, const int reference_frame_index,
1111     const MotionVector& mv, const int x, const int y, const int width,
1112     const int height, const int candidate_row, const int candidate_column,
1113     uint16_t* const prediction, const bool is_compound,
1114     const bool is_inter_intra, uint8_t* const dest,
1115     const ptrdiff_t dest_stride) {
1116   const BlockParameters& bp =
1117       *block_parameters_holder_.Find(candidate_row, candidate_column);
1118   int start_x;
1119   int start_y;
1120   int step_x;
1121   int step_y;
1122   ScaleMotionVector(mv, plane, reference_frame_index, x, y, &start_x, &start_y,
1123                     &step_x, &step_y);
1124   const int horizontal_filter_index = bp.interpolation_filter[1];
1125   const int vertical_filter_index = bp.interpolation_filter[0];
1126   const int subsampling_x = subsampling_x_[plane];
1127   const int subsampling_y = subsampling_y_[plane];
1128   // reference_frame_index equal to -1 indicates using current frame as
1129   // reference.
1130   const YuvBuffer* const reference_buffer =
1131       (reference_frame_index == -1)
1132           ? current_frame_.buffer()
1133           : reference_frames_[reference_frame_index]->buffer();
1134   const int reference_upscaled_width =
1135       (reference_frame_index == -1)
1136           ? MultiplyBy4(frame_header_.columns4x4)
1137           : reference_frames_[reference_frame_index]->upscaled_width();
1138   const int reference_height =
1139       (reference_frame_index == -1)
1140           ? MultiplyBy4(frame_header_.rows4x4)
1141           : reference_frames_[reference_frame_index]->frame_height();
1142   const int ref_start_x = 0;
1143   const int ref_last_x =
1144       SubsampledValue(reference_upscaled_width, subsampling_x) - 1;
1145   const int ref_start_y = 0;
1146   const int ref_last_y = SubsampledValue(reference_height, subsampling_y) - 1;
1147 
1148   const bool is_scaled = (reference_frame_index != -1) &&
1149                          (frame_header_.width != reference_upscaled_width ||
1150                           frame_header_.height != reference_height);
1151   const int bitdepth = sequence_header_.color_config.bitdepth;
1152   const int pixel_size = (bitdepth == 8) ? sizeof(uint8_t) : sizeof(uint16_t);
1153   int ref_block_start_x;
1154   int ref_block_start_y;
1155   int ref_block_end_x;
1156   const bool extend_block = GetReferenceBlockPosition(
1157       reference_frame_index, is_scaled, width, height, ref_start_x, ref_last_x,
1158       ref_start_y, ref_last_y, start_x, start_y, step_x, step_y,
1159       reference_buffer->left_border(plane),
1160       reference_buffer->right_border(plane),
1161       reference_buffer->top_border(plane),
1162       reference_buffer->bottom_border(plane), &ref_block_start_x,
1163       &ref_block_start_y, &ref_block_end_x);
1164 
1165   // In frame parallel mode, ensure that the reference block has been decoded
1166   // and available for referencing.
1167   if (reference_frame_index != -1 && frame_parallel_) {
1168     int reference_y_max;
1169     if (is_scaled) {
1170       // TODO(vigneshv): For now, we wait for the entire reference frame to be
1171       // decoded if we are using scaled references. This will eventually be
1172       // fixed.
1173       reference_y_max = reference_height;
1174     } else {
1175       reference_y_max =
1176           std::min(ref_block_start_y + height + kSubPixelTaps, ref_last_y);
1177       // For U and V planes with subsampling, we need to multiply
1178       // reference_y_max by 2 since we only track the progress of Y planes.
1179       reference_y_max = LeftShift(reference_y_max, subsampling_y);
1180     }
1181     if (reference_frame_progress_cache_[reference_frame_index] <
1182             reference_y_max &&
1183         !reference_frames_[reference_frame_index]->WaitUntil(
1184             reference_y_max,
1185             &reference_frame_progress_cache_[reference_frame_index])) {
1186       return false;
1187     }
1188   }
1189 
1190   const uint8_t* block_start = nullptr;
1191   ptrdiff_t convolve_buffer_stride;
1192   if (!extend_block) {
1193     const YuvBuffer* const reference_buffer =
1194         (reference_frame_index == -1)
1195             ? current_frame_.buffer()
1196             : reference_frames_[reference_frame_index]->buffer();
1197     convolve_buffer_stride = reference_buffer->stride(plane);
1198     if (reference_frame_index == -1 || is_scaled) {
1199       block_start = reference_buffer->data(plane) +
1200                     ref_block_start_y * reference_buffer->stride(plane) +
1201                     ref_block_start_x * pixel_size;
1202     } else {
1203       block_start = reference_buffer->data(plane) +
1204                     (ref_block_start_y + kConvolveBorderLeftTop) *
1205                         reference_buffer->stride(plane) +
1206                     (ref_block_start_x + kConvolveBorderLeftTop) * pixel_size;
1207     }
1208   } else {
1209     // The block width can be at most 2 times as much as current
1210     // block's width because of scaling.
1211     auto block_extended_width = Align<ptrdiff_t>(
1212         (2 * width + kConvolveBorderLeftTop + kConvolveBorderRight) *
1213             pixel_size,
1214         kMaxAlignment);
1215     convolve_buffer_stride = block.scratch_buffer->convolve_block_buffer_stride;
1216 #if LIBGAV1_MAX_BITDEPTH >= 10
1217     if (bitdepth > 8) {
1218       BuildConvolveBlock<uint16_t>(
1219           plane, reference_frame_index, is_scaled, height, ref_start_x,
1220           ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1221           ref_block_end_x, ref_block_start_y,
1222           block.scratch_buffer->convolve_block_buffer.get(),
1223           convolve_buffer_stride, block_extended_width);
1224     } else {
1225 #endif
1226       BuildConvolveBlock<uint8_t>(
1227           plane, reference_frame_index, is_scaled, height, ref_start_x,
1228           ref_last_x, ref_start_y, ref_last_y, step_y, ref_block_start_x,
1229           ref_block_end_x, ref_block_start_y,
1230           block.scratch_buffer->convolve_block_buffer.get(),
1231           convolve_buffer_stride, block_extended_width);
1232 #if LIBGAV1_MAX_BITDEPTH >= 10
1233     }
1234 #endif
1235     block_start = block.scratch_buffer->convolve_block_buffer.get() +
1236                   (is_scaled ? 0
1237                              : kConvolveBorderLeftTop * convolve_buffer_stride +
1238                                    kConvolveBorderLeftTop * pixel_size);
1239   }
1240 
1241   void* const output =
1242       (is_compound || is_inter_intra) ? prediction : static_cast<void*>(dest);
1243   ptrdiff_t output_stride = (is_compound || is_inter_intra)
1244                                 ? /*prediction_stride=*/width
1245                                 : dest_stride;
1246 #if LIBGAV1_MAX_BITDEPTH >= 10
1247   // |is_inter_intra| calculations are written to the |prediction| buffer.
1248   // Unlike the |is_compound| calculations the output is Pixel and not uint16_t.
1249   // convolve_func() expects |output_stride| to be in bytes and not Pixels.
1250   // |prediction_stride| is in units of uint16_t. Adjust |output_stride| to
1251   // account for this.
1252   if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1253     output_stride *= 2;
1254   }
1255 #endif
1256   assert(output != nullptr);
1257   if (is_scaled) {
1258     dsp::ConvolveScaleFunc convolve_func = dsp_.convolve_scale[is_compound];
1259     assert(convolve_func != nullptr);
1260 
1261     convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1262                   vertical_filter_index, start_x, start_y, step_x, step_y,
1263                   width, height, output, output_stride);
1264   } else {
1265     const int horizontal_filter_id = (start_x >> 6) & kSubPixelMask;
1266     const int vertical_filter_id = (start_y >> 6) & kSubPixelMask;
1267 
1268     dsp::ConvolveFunc convolve_func =
1269         dsp_.convolve[reference_frame_index == -1][is_compound]
1270                      [vertical_filter_id != 0][horizontal_filter_id != 0];
1271     assert(convolve_func != nullptr);
1272 
1273     convolve_func(block_start, convolve_buffer_stride, horizontal_filter_index,
1274                   vertical_filter_index, horizontal_filter_id,
1275                   vertical_filter_id, width, height, output, output_stride);
1276   }
1277   return true;
1278 }
1279 
BlockWarpProcess(const Block & block,const Plane plane,const int index,const int block_start_x,const int block_start_y,const int width,const int height,GlobalMotion * const warp_params,const bool is_compound,const bool is_inter_intra,uint8_t * const dest,const ptrdiff_t dest_stride)1280 bool Tile::BlockWarpProcess(const Block& block, const Plane plane,
1281                             const int index, const int block_start_x,
1282                             const int block_start_y, const int width,
1283                             const int height, GlobalMotion* const warp_params,
1284                             const bool is_compound, const bool is_inter_intra,
1285                             uint8_t* const dest, const ptrdiff_t dest_stride) {
1286   assert(width >= 8 && height >= 8);
1287   const BlockParameters& bp = *block.bp;
1288   const int reference_frame_index =
1289       frame_header_.reference_frame_index[bp.reference_frame[index] -
1290                                           kReferenceFrameLast];
1291   const uint8_t* const source =
1292       reference_frames_[reference_frame_index]->buffer()->data(plane);
1293   ptrdiff_t source_stride =
1294       reference_frames_[reference_frame_index]->buffer()->stride(plane);
1295   const int source_width =
1296       reference_frames_[reference_frame_index]->buffer()->width(plane);
1297   const int source_height =
1298       reference_frames_[reference_frame_index]->buffer()->height(plane);
1299   uint16_t* const prediction = block.scratch_buffer->prediction_buffer[index];
1300 
1301   // In frame parallel mode, ensure that the reference block has been decoded
1302   // and available for referencing.
1303   if (frame_parallel_) {
1304     int reference_y_max = -1;
1305     // Find out the maximum y-coordinate for warping.
1306     for (int start_y = block_start_y; start_y < block_start_y + height;
1307          start_y += 8) {
1308       for (int start_x = block_start_x; start_x < block_start_x + width;
1309            start_x += 8) {
1310         const int src_x = (start_x + 4) << subsampling_x_[plane];
1311         const int src_y = (start_y + 4) << subsampling_y_[plane];
1312         const int dst_y = src_x * warp_params->params[4] +
1313                           src_y * warp_params->params[5] +
1314                           warp_params->params[1];
1315         const int y4 = dst_y >> subsampling_y_[plane];
1316         const int iy4 = y4 >> kWarpedModelPrecisionBits;
1317         reference_y_max = std::max(iy4 + 8, reference_y_max);
1318       }
1319     }
1320     // For U and V planes with subsampling, we need to multiply reference_y_max
1321     // by 2 since we only track the progress of Y planes.
1322     reference_y_max = LeftShift(reference_y_max, subsampling_y_[plane]);
1323     if (reference_frame_progress_cache_[reference_frame_index] <
1324             reference_y_max &&
1325         !reference_frames_[reference_frame_index]->WaitUntil(
1326             reference_y_max,
1327             &reference_frame_progress_cache_[reference_frame_index])) {
1328       return false;
1329     }
1330   }
1331   if (is_compound) {
1332     dsp_.warp_compound(source, source_stride, source_width, source_height,
1333                        warp_params->params, subsampling_x_[plane],
1334                        subsampling_y_[plane], block_start_x, block_start_y,
1335                        width, height, warp_params->alpha, warp_params->beta,
1336                        warp_params->gamma, warp_params->delta, prediction,
1337                        /*prediction_stride=*/width);
1338   } else {
1339     void* const output = is_inter_intra ? static_cast<void*>(prediction) : dest;
1340     ptrdiff_t output_stride =
1341         is_inter_intra ? /*prediction_stride=*/width : dest_stride;
1342 #if LIBGAV1_MAX_BITDEPTH >= 10
1343     // |is_inter_intra| calculations are written to the |prediction| buffer.
1344     // Unlike the |is_compound| calculations the output is Pixel and not
1345     // uint16_t. warp_clip() expects |output_stride| to be in bytes and not
1346     // Pixels. |prediction_stride| is in units of uint16_t. Adjust
1347     // |output_stride| to account for this.
1348     if (is_inter_intra && sequence_header_.color_config.bitdepth > 8) {
1349       output_stride *= 2;
1350     }
1351 #endif
1352     dsp_.warp(source, source_stride, source_width, source_height,
1353               warp_params->params, subsampling_x_[plane], subsampling_y_[plane],
1354               block_start_x, block_start_y, width, height, warp_params->alpha,
1355               warp_params->beta, warp_params->gamma, warp_params->delta, output,
1356               output_stride);
1357   }
1358   return true;
1359 }
1360 
1361 }  // namespace libgav1
1362