1 /*
2  * Copyright 2019 The libgav1 Authors
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef LIBGAV1_SRC_UTILS_TYPES_H_
18 #define LIBGAV1_SRC_UTILS_TYPES_H_
19 
20 #include <array>
21 #include <cstddef>
22 #include <cstdint>
23 #include <memory>
24 
25 #include "src/utils/array_2d.h"
26 #include "src/utils/constants.h"
27 #include "src/utils/memory.h"
28 
29 namespace libgav1 {
30 
31 struct MotionVector : public Allocable {
32   static constexpr int kRow = 0;
33   static constexpr int kColumn = 1;
34 
35   MotionVector() = default;
36   MotionVector(const MotionVector& mv) = default;
37 
38   MotionVector& operator=(const MotionVector& rhs) {
39     mv32 = rhs.mv32;
40     return *this;
41   }
42 
43   bool operator==(const MotionVector& rhs) const { return mv32 == rhs.mv32; }
44 
45   union {
46     // Motion vectors will always fit in int16_t and using int16_t here instead
47     // of int saves significant memory since some of the frame sized structures
48     // store motion vectors.
49     int16_t mv[2];
50     // A uint32_t view into the |mv| array. Useful for cases where both the
51     // motion vectors have to be copied or compared with a single 32 bit
52     // instruction.
53     uint32_t mv32;
54   };
55 };
56 
57 union CompoundMotionVector {
58   CompoundMotionVector() = default;
59   CompoundMotionVector(const CompoundMotionVector& mv) = default;
60 
61   CompoundMotionVector& operator=(const CompoundMotionVector& rhs) {
62     mv64 = rhs.mv64;
63     return *this;
64   }
65 
66   bool operator==(const CompoundMotionVector& rhs) const {
67     return mv64 == rhs.mv64;
68   }
69 
70   MotionVector mv[2];
71   // A uint64_t view into the |mv| array. Useful for cases where all the motion
72   // vectors have to be copied or compared with a single 64 bit instruction.
73   uint64_t mv64;
74 };
75 
76 // Stores the motion information used for motion field estimation.
77 struct TemporalMotionField : public Allocable {
78   Array2D<MotionVector> mv;
79   Array2D<int8_t> reference_offset;
80 };
81 
82 // MvContexts contains the contexts used to decode portions of an inter block
83 // mode info to set the y_mode field in BlockParameters.
84 //
85 // The contexts in the struct correspond to the ZeroMvContext, RefMvContext,
86 // and NewMvContext variables in the spec.
87 struct MvContexts {
88   int zero_mv;
89   int reference_mv;
90   int new_mv;
91 };
92 
93 struct PaletteModeInfo {
94   uint8_t size[kNumPlaneTypes];
95   uint16_t color[kMaxPlanes][kMaxPaletteSize];
96 };
97 
98 // Stores the parameters used by the prediction process. The members of the
99 // struct are filled in when parsing the bitstream and used when the prediction
100 // is computed. The information in this struct is associated with a single
101 // block.
102 // While both BlockParameters and PredictionParameters store information
103 // pertaining to a Block, the only difference is that BlockParameters outlives
104 // the block itself (for example, some of the variables in BlockParameters are
105 // used to compute the context for reading elements in the subsequent blocks).
106 struct PredictionParameters : public Allocable {
107   // Restore the index in the unsorted mv stack from the least 3 bits of sorted
108   // |weight_index_stack|.
reference_mvPredictionParameters109   const MotionVector& reference_mv(int stack_index) const {
110     return ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)];
111   }
reference_mvPredictionParameters112   const MotionVector& reference_mv(int stack_index, int mv_index) const {
113     return compound_ref_mv_stack[7 - (weight_index_stack[stack_index] & 7)]
114         .mv[mv_index];
115   }
116 
IncreaseWeightPredictionParameters117   void IncreaseWeight(ptrdiff_t index, int weight) {
118     weight_index_stack[index] += weight << 3;
119   }
120 
SetWeightIndexStackEntryPredictionParameters121   void SetWeightIndexStackEntry(int index, int weight) {
122     weight_index_stack[index] = (weight << 3) + 7 - index;
123   }
124 
125   bool use_filter_intra;
126   FilterIntraPredictor filter_intra_mode;
127   int angle_delta[kNumPlaneTypes];
128   int8_t cfl_alpha_u;
129   int8_t cfl_alpha_v;
130   int max_luma_width;
131   int max_luma_height;
132   Array2D<uint8_t> color_index_map[kNumPlaneTypes];
133   bool use_intra_block_copy;
134   InterIntraMode inter_intra_mode;
135   bool is_wedge_inter_intra;
136   int wedge_index;
137   int wedge_sign;
138   bool mask_is_inverse;
139   MotionMode motion_mode;
140   CompoundPredictionType compound_prediction_type;
141   union {
142     // |ref_mv_stack| and |compound_ref_mv_stack| are not sorted after
143     // construction. reference_mv() must be called to get the correct element.
144     MotionVector ref_mv_stack[kMaxRefMvStackSize];
145     CompoundMotionVector compound_ref_mv_stack[kMaxRefMvStackSize];
146   };
147   // The least 3 bits of |weight_index_stack| store the index information, and
148   // the other bits store the weight. The index information is actually 7 -
149   // index to make the descending order sort stable (preserves the original
150   // order for elements with the same weight). Sorting an int16_t array is much
151   // faster than sorting a struct array with weight and index stored separately.
152   int16_t weight_index_stack[kMaxRefMvStackSize];
153   // In the spec, the weights of all the nearest mvs are incremented by a bonus
154   // weight which is larger than any natural weight, and later the weights of
155   // the mvs are compared with this bonus weight to determine their contexts. We
156   // replace this procedure by introducing |nearest_mv_count|, which records the
157   // count of the nearest mvs. Since all the nearest mvs are in the beginning of
158   // the mv stack, the index of a mv in the mv stack can be compared with
159   // |nearest_mv_count| to get that mv's context.
160   int nearest_mv_count;
161   int ref_mv_count;
162   int ref_mv_index;
163   MotionVector global_mv[2];
164   int num_warp_samples;
165   int warp_estimate_candidates[kMaxLeastSquaresSamples][4];
166 };
167 
168 // A lot of BlockParameters objects are created, so the smallest type is used
169 // for each field. The ranges of some fields are documented to justify why
170 // their types are large enough.
171 struct BlockParameters : public Allocable {
172   BlockSize size;
173   bool skip;
174   // True means that this block will use some default settings (that
175   // correspond to compound prediction) and so most of the mode info is
176   // skipped. False means that the mode info is not skipped.
177   bool skip_mode;
178   bool is_inter;
179   bool is_explicit_compound_type;  // comp_group_idx in the spec.
180   bool is_compound_type_average;   // compound_idx in the spec.
181   bool is_global_mv_block;
182   bool use_predicted_segment_id;  // only valid with temporal update enabled.
183   int8_t segment_id;              // segment_id is in the range [0, 7].
184   PredictionMode y_mode;
185   PredictionMode uv_mode;
186   TransformSize transform_size;
187   TransformSize uv_transform_size;
188   InterpolationFilter interpolation_filter[2];
189   ReferenceFrameType reference_frame[2];
190   // The index of this array is as follows:
191   //  0 - Y plane vertical filtering.
192   //  1 - Y plane horizontal filtering.
193   //  2 - U plane (both directions).
194   //  3 - V plane (both directions).
195   uint8_t deblock_filter_level[kFrameLfCount];
196   CompoundMotionVector mv;
197   PaletteModeInfo palette_mode_info;
198   // When |Tile::split_parse_and_decode_| is true, each block gets its own
199   // instance of |prediction_parameters|. When it is false, all the blocks point
200   // to |Tile::prediction_parameters_|. This field is valid only as long as the
201   // block is *being* decoded. The lifetime and usage of this field can be
202   // better understood by following its flow in tile.cc.
203   std::unique_ptr<PredictionParameters> prediction_parameters;
204 };
205 
206 // A five dimensional array used to store the wedge masks. The dimensions are:
207 //   - block_size_index (returned by GetWedgeBlockSizeIndex() in prediction.cc).
208 //   - flip_sign (0 or 1).
209 //   - wedge_index (0 to 15).
210 //   - each of those three dimensions is a 2d array of block_width by
211 //     block_height.
212 using WedgeMaskArray =
213     std::array<std::array<std::array<Array2D<uint8_t>, 16>, 2>, 9>;
214 
215 enum GlobalMotionTransformationType : uint8_t {
216   kGlobalMotionTransformationTypeIdentity,
217   kGlobalMotionTransformationTypeTranslation,
218   kGlobalMotionTransformationTypeRotZoom,
219   kGlobalMotionTransformationTypeAffine,
220   kNumGlobalMotionTransformationTypes
221 };
222 
223 // Global motion and warped motion parameters. See the paper for more info:
224 // S. Parker, Y. Chen, D. Barker, P. de Rivaz, D. Mukherjee, "Global and locally
225 // adaptive warped motion compensation in video compression", Proc. IEEE
226 // International Conference on Image Processing (ICIP), pp. 275-279, Sep. 2017.
227 struct GlobalMotion {
228   GlobalMotionTransformationType type;
229   int32_t params[6];
230 
231   // Represent two shearing operations. Computed from |params| by SetupShear().
232   //
233   // The least significant six (= kWarpParamRoundingBits) bits are all zeros.
234   // (This means alpha, beta, gamma, and delta could be represented by a 10-bit
235   // signed integer.) The minimum value is INT16_MIN (= -32768) and the maximum
236   // value is 32704 = 0x7fc0, the largest int16_t value whose least significant
237   // six bits are all zeros.
238   //
239   // Valid warp parameters (as validated by SetupShear()) have smaller ranges.
240   // Their absolute values are less than 2^14 (= 16384). (This follows from
241   // the warpValid check at the end of Section 7.11.3.6.)
242   //
243   // NOTE: Section 7.11.3.6 of the spec allows a maximum value of 32768, which
244   // is outside the range of int16_t. When cast to int16_t, 32768 becomes
245   // -32768. This potential int16_t overflow does not matter because either
246   // 32768 or -32768 causes SetupShear() to return false,
247   int16_t alpha;
248   int16_t beta;
249   int16_t gamma;
250   int16_t delta;
251 };
252 
253 // Loop filter parameters:
254 //
255 // If level[0] and level[1] are both equal to 0, the loop filter process is
256 // not invoked.
257 //
258 // |sharpness| and |delta_enabled| are only used by the loop filter process.
259 //
260 // The |ref_deltas| and |mode_deltas| arrays are used not only by the loop
261 // filter process but also by the reference frame update and loading
262 // processes. The loop filter process uses |ref_deltas| and |mode_deltas| only
263 // when |delta_enabled| is true.
264 struct LoopFilter {
265   // Contains loop filter strength values in the range of [0, 63].
266   std::array<int8_t, kFrameLfCount> level;
267   // Indicates the sharpness level in the range of [0, 7].
268   int8_t sharpness;
269   // Whether the filter level depends on the mode and reference frame used to
270   // predict a block.
271   bool delta_enabled;
272   // Whether additional syntax elements were read that specify which mode and
273   // reference frame deltas are to be updated. loop_filter_delta_update field in
274   // Section 5.9.11 of the spec.
275   bool delta_update;
276   // Contains the adjustment needed for the filter level based on the chosen
277   // reference frame, in the range of [-64, 63].
278   std::array<int8_t, kNumReferenceFrameTypes> ref_deltas;
279   // Contains the adjustment needed for the filter level based on the chosen
280   // mode, in the range of [-64, 63].
281   std::array<int8_t, kLoopFilterMaxModeDeltas> mode_deltas;
282 };
283 
284 struct Delta {
285   bool present;
286   uint8_t scale;
287   bool multi;
288 };
289 
290 struct Cdef {
291   uint8_t damping;  // damping value from the spec + (bitdepth - 8).
292   uint8_t bits;
293   // All the strength values are the values from the spec and left shifted by
294   // (bitdepth - 8).
295   uint8_t y_primary_strength[kMaxCdefStrengths];
296   uint8_t y_secondary_strength[kMaxCdefStrengths];
297   uint8_t uv_primary_strength[kMaxCdefStrengths];
298   uint8_t uv_secondary_strength[kMaxCdefStrengths];
299 };
300 
301 struct TileInfo {
302   bool uniform_spacing;
303   int sb_rows;
304   int sb_columns;
305   int tile_count;
306   int tile_columns_log2;
307   int tile_columns;
308   int tile_column_start[kMaxTileColumns + 1];
309   // This field is not used by libgav1, but is populated for use by some
310   // hardware decoders. So it must not be removed.
311   int tile_column_width_in_superblocks[kMaxTileColumns + 1];
312   int tile_rows_log2;
313   int tile_rows;
314   int tile_row_start[kMaxTileRows + 1];
315   // This field is not used by libgav1, but is populated for use by some
316   // hardware decoders. So it must not be removed.
317   int tile_row_height_in_superblocks[kMaxTileRows + 1];
318   int16_t context_update_id;
319   uint8_t tile_size_bytes;
320 };
321 
322 struct LoopRestoration {
323   LoopRestorationType type[kMaxPlanes];
324   int unit_size_log2[kMaxPlanes];
325 };
326 
327 // Stores the quantization parameters of Section 5.9.12.
328 struct QuantizerParameters {
329   // base_index is in the range [0, 255].
330   uint8_t base_index;
331   int8_t delta_dc[kMaxPlanes];
332   // delta_ac[kPlaneY] is always 0.
333   int8_t delta_ac[kMaxPlanes];
334   bool use_matrix;
335   // The |matrix_level| array is used only when |use_matrix| is true.
336   // matrix_level[plane] specifies the level in the quantizer matrix that
337   // should be used for decoding |plane|. The quantizer matrix has 15 levels,
338   // from 0 to 14. The range of matrix_level[plane] is [0, 15]. If
339   // matrix_level[plane] is 15, the quantizer matrix is not used.
340   int8_t matrix_level[kMaxPlanes];
341 };
342 
343 // The corresponding segment feature constants in the AV1 spec are named
344 // SEG_LVL_xxx.
345 enum SegmentFeature : uint8_t {
346   kSegmentFeatureQuantizer,
347   kSegmentFeatureLoopFilterYVertical,
348   kSegmentFeatureLoopFilterYHorizontal,
349   kSegmentFeatureLoopFilterU,
350   kSegmentFeatureLoopFilterV,
351   kSegmentFeatureReferenceFrame,
352   kSegmentFeatureSkip,
353   kSegmentFeatureGlobalMv,
354   kSegmentFeatureMax
355 };
356 
357 struct Segmentation {
358   // 5.11.14.
359   // Returns true if the feature is enabled in the segment.
FeatureActiveSegmentation360   bool FeatureActive(int segment_id, SegmentFeature feature) const {
361     return enabled && segment_id < kMaxSegments &&
362            feature_enabled[segment_id][feature];
363   }
364 
365   // Returns true if the feature is signed.
FeatureSignedSegmentation366   static bool FeatureSigned(SegmentFeature feature) {
367     // Only the first five segment features are signed, so this comparison
368     // suffices.
369     return feature <= kSegmentFeatureLoopFilterV;
370   }
371 
372   bool enabled;
373   bool update_map;
374   bool update_data;
375   bool temporal_update;
376   // True if the segment id will be read before the skip syntax element. False
377   // if the skip syntax element will be read first.
378   bool segment_id_pre_skip;
379   // The highest numbered segment id that has some enabled feature. Used as
380   // the upper bound for decoding segment ids.
381   int8_t last_active_segment_id;
382 
383   bool feature_enabled[kMaxSegments][kSegmentFeatureMax];
384   int16_t feature_data[kMaxSegments][kSegmentFeatureMax];
385   bool lossless[kMaxSegments];
386   // Cached values of get_qindex(1, segmentId), to be consumed by
387   // Tile::ReadTransformType(). The values are in the range [0, 255].
388   uint8_t qindex[kMaxSegments];
389 };
390 
391 // Section 6.8.20.
392 // Note: In spec, film grain section uses YCbCr to denote variable names,
393 // such as num_cb_points, num_cr_points. To keep it consistent with other
394 // parts of code, we use YUV, i.e., num_u_points, num_v_points, etc.
395 struct FilmGrainParams {
396   bool apply_grain;
397   bool update_grain;
398   bool chroma_scaling_from_luma;
399   bool overlap_flag;
400   bool clip_to_restricted_range;
401 
402   uint8_t num_y_points;  // [0, 14].
403   uint8_t num_u_points;  // [0, 10].
404   uint8_t num_v_points;  // [0, 10].
405   // Must be [0, 255]. 10/12 bit /= 4 or 16. Must be in increasing order.
406   uint8_t point_y_value[14];
407   uint8_t point_y_scaling[14];
408   uint8_t point_u_value[10];
409   uint8_t point_u_scaling[10];
410   uint8_t point_v_value[10];
411   uint8_t point_v_scaling[10];
412 
413   uint8_t chroma_scaling;              // [8, 11].
414   uint8_t auto_regression_coeff_lag;   // [0, 3].
415   int8_t auto_regression_coeff_y[24];  // [-128, 127]
416   int8_t auto_regression_coeff_u[25];  // [-128, 127]
417   int8_t auto_regression_coeff_v[25];  // [-128, 127]
418   // Shift value: auto regression coeffs range
419   // 6: [-2, 2)
420   // 7: [-1, 1)
421   // 8: [-0.5, 0.5)
422   // 9: [-0.25, 0.25)
423   uint8_t auto_regression_shift;
424 
425   uint16_t grain_seed;
426   int reference_index;
427   int grain_scale_shift;
428   // These multipliers are encoded as nonnegative values by adding 128 first.
429   // The 128 is subtracted during parsing.
430   int8_t u_multiplier;       // [-128, 127]
431   int8_t u_luma_multiplier;  // [-128, 127]
432   // These offsets are encoded as nonnegative values by adding 256 first. The
433   // 256 is subtracted during parsing.
434   int16_t u_offset;          // [-256, 255]
435   int8_t v_multiplier;       // [-128, 127]
436   int8_t v_luma_multiplier;  // [-128, 127]
437   int16_t v_offset;          // [-256, 255]
438 };
439 
440 struct ObuFrameHeader {
441   uint16_t display_frame_id;
442   uint16_t current_frame_id;
443   int64_t frame_offset;
444   uint16_t expected_frame_id[kNumInterReferenceFrameTypes];
445   int32_t width;
446   int32_t height;
447   int32_t columns4x4;
448   int32_t rows4x4;
449   // The render size (render_width and render_height) is a hint to the
450   // application about the desired display size. It has no effect on the
451   // decoding process.
452   int32_t render_width;
453   int32_t render_height;
454   int32_t upscaled_width;
455   LoopRestoration loop_restoration;
456   uint32_t buffer_removal_time[kMaxOperatingPoints];
457   uint32_t frame_presentation_time;
458   // Note: global_motion[0] (for kReferenceFrameIntra) is not used.
459   std::array<GlobalMotion, kNumReferenceFrameTypes> global_motion;
460   TileInfo tile_info;
461   QuantizerParameters quantizer;
462   Segmentation segmentation;
463   bool show_existing_frame;
464   // frame_to_show is in the range [0, 7]. Only used if show_existing_frame is
465   // true.
466   int8_t frame_to_show;
467   FrameType frame_type;
468   bool show_frame;
469   bool showable_frame;
470   bool error_resilient_mode;
471   bool enable_cdf_update;
472   bool frame_size_override_flag;
473   // The order_hint syntax element in the uncompressed header. If
474   // show_existing_frame is false, the OrderHint variable in the spec is equal
475   // to this field, and so this field can be used in place of OrderHint when
476   // show_existing_frame is known to be false, such as during tile decoding.
477   uint8_t order_hint;
478   int8_t primary_reference_frame;
479   bool render_and_frame_size_different;
480   bool use_superres;
481   uint8_t superres_scale_denominator;
482   bool allow_screen_content_tools;
483   bool allow_intrabc;
484   bool frame_refs_short_signaling;
485   // A bitmask that specifies which reference frame slots will be updated with
486   // the current frame after it is decoded.
487   uint8_t refresh_frame_flags;
488   static_assert(sizeof(ObuFrameHeader::refresh_frame_flags) * 8 ==
489                     kNumReferenceFrameTypes,
490                 "");
491   bool found_reference;
492   int8_t force_integer_mv;
493   bool allow_high_precision_mv;
494   InterpolationFilter interpolation_filter;
495   bool is_motion_mode_switchable;
496   bool use_ref_frame_mvs;
497   bool enable_frame_end_update_cdf;
498   // True if all segments are losslessly encoded at the coded resolution.
499   bool coded_lossless;
500   // True if all segments are losslessly encoded at the upscaled resolution.
501   bool upscaled_lossless;
502   TxMode tx_mode;
503   // True means that the mode info for inter blocks contains the syntax
504   // element comp_mode that indicates whether to use single or compound
505   // prediction. False means that all inter blocks will use single prediction.
506   bool reference_mode_select;
507   // The frames to use for compound prediction when skip_mode is true.
508   ReferenceFrameType skip_mode_frame[2];
509   bool skip_mode_present;
510   bool reduced_tx_set;
511   bool allow_warped_motion;
512   Delta delta_q;
513   Delta delta_lf;
514   // A valid value of reference_frame_index[i] is in the range [0, 7]. -1
515   // indicates an invalid value.
516   //
517   // NOTE: When the frame is an intra frame (frame_type is kFrameKey or
518   // kFrameIntraOnly), reference_frame_index is not used and may be
519   // uninitialized.
520   int8_t reference_frame_index[kNumInterReferenceFrameTypes];
521   // The ref_order_hint[ i ] syntax element in the uncompressed header.
522   // Specifies the expected output order hint for each reference frame.
523   uint8_t reference_order_hint[kNumReferenceFrameTypes];
524   LoopFilter loop_filter;
525   Cdef cdef;
526   FilmGrainParams film_grain_params;
527 };
528 
529 // Structure used for traversing the partition tree.
530 struct PartitionTreeNode {
531   PartitionTreeNode() = default;
PartitionTreeNodePartitionTreeNode532   PartitionTreeNode(int row4x4, int column4x4, BlockSize block_size)
533       : row4x4(row4x4), column4x4(column4x4), block_size(block_size) {}
534   int row4x4 = -1;
535   int column4x4 = -1;
536   BlockSize block_size = kBlockInvalid;
537 };
538 
539 // Structure used for storing the transform parameters in a superblock.
540 struct TransformParameters {
541   TransformParameters() = default;
TransformParametersTransformParameters542   TransformParameters(TransformType type, int non_zero_coeff_count)
543       : type(type), non_zero_coeff_count(non_zero_coeff_count) {}
544   TransformType type;
545   int non_zero_coeff_count;
546 };
547 
548 }  // namespace libgav1
549 #endif  // LIBGAV1_SRC_UTILS_TYPES_H_
550