1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "call/rtp_payload_params.h"
12 
13 #include <stddef.h>
14 
15 #include <algorithm>
16 
17 #include "absl/container/inlined_vector.h"
18 #include "absl/strings/match.h"
19 #include "absl/types/variant.h"
20 #include "api/video/video_timing.h"
21 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
22 #include "modules/video_coding/codecs/interface/common_constants.h"
23 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
24 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
25 #include "modules/video_coding/frame_dependencies_calculator.h"
26 #include "rtc_base/arraysize.h"
27 #include "rtc_base/checks.h"
28 #include "rtc_base/logging.h"
29 #include "rtc_base/random.h"
30 #include "rtc_base/time_utils.h"
31 
32 namespace webrtc {
33 
34 namespace {
PopulateRtpWithCodecSpecifics(const CodecSpecificInfo & info,absl::optional<int> spatial_index,RTPVideoHeader * rtp)35 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
36                                    absl::optional<int> spatial_index,
37                                    RTPVideoHeader* rtp) {
38   rtp->codec = info.codecType;
39   switch (info.codecType) {
40     case kVideoCodecVP8: {
41       auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
42       vp8_header.InitRTPVideoHeaderVP8();
43       vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
44       vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
45       vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
46       vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
47       rtp->simulcastIdx = spatial_index.value_or(0);
48       return;
49     }
50     case kVideoCodecVP9: {
51       auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
52       vp9_header.InitRTPVideoHeaderVP9();
53       vp9_header.inter_pic_predicted =
54           info.codecSpecific.VP9.inter_pic_predicted;
55       vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
56       vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
57       vp9_header.non_ref_for_inter_layer_pred =
58           info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
59       vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
60       vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
61       vp9_header.inter_layer_predicted =
62           info.codecSpecific.VP9.inter_layer_predicted;
63       vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
64       vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
65       vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
66       if (vp9_header.num_spatial_layers > 1) {
67         vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
68       } else {
69         vp9_header.spatial_idx = kNoSpatialIdx;
70       }
71       if (info.codecSpecific.VP9.ss_data_available) {
72         vp9_header.spatial_layer_resolution_present =
73             info.codecSpecific.VP9.spatial_layer_resolution_present;
74         if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
75           for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
76                ++i) {
77             vp9_header.width[i] = info.codecSpecific.VP9.width[i];
78             vp9_header.height[i] = info.codecSpecific.VP9.height[i];
79           }
80         }
81         vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
82       }
83 
84       vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
85       for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
86         vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
87       }
88       vp9_header.end_of_picture = info.codecSpecific.VP9.end_of_picture;
89       return;
90     }
91     case kVideoCodecH264: {
92       auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
93       h264_header.packetization_mode =
94           info.codecSpecific.H264.packetization_mode;
95       rtp->simulcastIdx = spatial_index.value_or(0);
96       return;
97     }
98     case kVideoCodecMultiplex:
99     case kVideoCodecGeneric:
100       rtp->codec = kVideoCodecGeneric;
101       rtp->simulcastIdx = spatial_index.value_or(0);
102       return;
103     default:
104       return;
105   }
106 }
107 
SetVideoTiming(const EncodedImage & image,VideoSendTiming * timing)108 void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
109   if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
110       image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
111     timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
112     return;
113   }
114 
115   timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
116       image.capture_time_ms_, image.timing_.encode_start_ms);
117   timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
118       image.capture_time_ms_, image.timing_.encode_finish_ms);
119   timing->packetization_finish_delta_ms = 0;
120   timing->pacer_exit_delta_ms = 0;
121   timing->network_timestamp_delta_ms = 0;
122   timing->network2_timestamp_delta_ms = 0;
123   timing->flags = image.timing_.flags;
124 }
125 }  // namespace
126 
RtpPayloadParams(const uint32_t ssrc,const RtpPayloadState * state,const WebRtcKeyValueConfig & trials)127 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
128                                    const RtpPayloadState* state,
129                                    const WebRtcKeyValueConfig& trials)
130     : ssrc_(ssrc),
131       generic_picture_id_experiment_(
132           absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
133                            "Enabled")) {
134   for (auto& spatial_layer : last_shared_frame_id_)
135     spatial_layer.fill(-1);
136 
137   buffer_id_to_frame_id_.fill(-1);
138 
139   Random random(rtc::TimeMicros());
140   state_.picture_id =
141       state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
142   state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
143 }
144 
145 RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
146 
~RtpPayloadParams()147 RtpPayloadParams::~RtpPayloadParams() {}
148 
GetRtpVideoHeader(const EncodedImage & image,const CodecSpecificInfo * codec_specific_info,int64_t shared_frame_id)149 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
150     const EncodedImage& image,
151     const CodecSpecificInfo* codec_specific_info,
152     int64_t shared_frame_id) {
153   RTPVideoHeader rtp_video_header;
154   if (codec_specific_info) {
155     PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
156                                   &rtp_video_header);
157   }
158   rtp_video_header.frame_type = image._frameType,
159   rtp_video_header.rotation = image.rotation_;
160   rtp_video_header.content_type = image.content_type_;
161   rtp_video_header.playout_delay = image.playout_delay_;
162   rtp_video_header.width = image._encodedWidth;
163   rtp_video_header.height = image._encodedHeight;
164   rtp_video_header.color_space = image.ColorSpace()
165                                      ? absl::make_optional(*image.ColorSpace())
166                                      : absl::nullopt;
167   SetVideoTiming(image, &rtp_video_header.video_timing);
168 
169   const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
170   const bool first_frame_in_picture =
171       (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
172           ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
173           : true;
174 
175   SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
176 
177   SetGeneric(codec_specific_info, shared_frame_id, is_keyframe,
178              &rtp_video_header);
179 
180   return rtp_video_header;
181 }
182 
ssrc() const183 uint32_t RtpPayloadParams::ssrc() const {
184   return ssrc_;
185 }
186 
state() const187 RtpPayloadState RtpPayloadParams::state() const {
188   return state_;
189 }
190 
SetCodecSpecific(RTPVideoHeader * rtp_video_header,bool first_frame_in_picture)191 void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
192                                         bool first_frame_in_picture) {
193   // Always set picture id. Set tl0_pic_idx iff temporal index is set.
194   if (first_frame_in_picture) {
195     state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
196   }
197   if (rtp_video_header->codec == kVideoCodecVP8) {
198     auto& vp8_header =
199         absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
200     vp8_header.pictureId = state_.picture_id;
201 
202     if (vp8_header.temporalIdx != kNoTemporalIdx) {
203       if (vp8_header.temporalIdx == 0) {
204         ++state_.tl0_pic_idx;
205       }
206       vp8_header.tl0PicIdx = state_.tl0_pic_idx;
207     }
208   }
209   if (rtp_video_header->codec == kVideoCodecVP9) {
210     auto& vp9_header =
211         absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
212     vp9_header.picture_id = state_.picture_id;
213 
214     // Note that in the case that we have no temporal layers but we do have
215     // spatial layers, packets will carry layering info with a temporal_idx of
216     // zero, and we then have to set and increment tl0_pic_idx.
217     if (vp9_header.temporal_idx != kNoTemporalIdx ||
218         vp9_header.spatial_idx != kNoSpatialIdx) {
219       if (first_frame_in_picture &&
220           (vp9_header.temporal_idx == 0 ||
221            vp9_header.temporal_idx == kNoTemporalIdx)) {
222         ++state_.tl0_pic_idx;
223       }
224       vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
225     }
226   }
227   if (generic_picture_id_experiment_ &&
228       rtp_video_header->codec == kVideoCodecGeneric) {
229     rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
230         .picture_id = state_.picture_id;
231   }
232 }
233 
234 RTPVideoHeader::GenericDescriptorInfo
GenericDescriptorFromFrameInfo(const GenericFrameInfo & frame_info,int64_t frame_id,VideoFrameType frame_type)235 RtpPayloadParams::GenericDescriptorFromFrameInfo(
236     const GenericFrameInfo& frame_info,
237     int64_t frame_id,
238     VideoFrameType frame_type) {
239   RTPVideoHeader::GenericDescriptorInfo generic;
240   generic.frame_id = frame_id;
241   generic.dependencies = dependencies_calculator_.FromBuffersUsage(
242       frame_type, frame_id, frame_info.encoder_buffers);
243   generic.chain_diffs =
244       chains_calculator_.From(frame_id, frame_info.part_of_chain);
245   generic.spatial_index = frame_info.spatial_id;
246   generic.temporal_index = frame_info.temporal_id;
247   generic.decode_target_indications = frame_info.decode_target_indications;
248   generic.active_decode_targets = frame_info.active_decode_targets;
249   return generic;
250 }
251 
SetGeneric(const CodecSpecificInfo * codec_specific_info,int64_t frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)252 void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
253                                   int64_t frame_id,
254                                   bool is_keyframe,
255                                   RTPVideoHeader* rtp_video_header) {
256   if (codec_specific_info && codec_specific_info->generic_frame_info &&
257       !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
258     if (is_keyframe) {
259       // Key frame resets all chains it is in.
260       chains_calculator_.Reset(
261           codec_specific_info->generic_frame_info->part_of_chain);
262     }
263     rtp_video_header->generic =
264         GenericDescriptorFromFrameInfo(*codec_specific_info->generic_frame_info,
265                                        frame_id, rtp_video_header->frame_type);
266     return;
267   }
268 
269   switch (rtp_video_header->codec) {
270     case VideoCodecType::kVideoCodecGeneric:
271       GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
272       return;
273     case VideoCodecType::kVideoCodecVP8:
274       if (codec_specific_info) {
275         Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
276                      is_keyframe, rtp_video_header);
277       }
278       return;
279     case VideoCodecType::kVideoCodecVP9:
280     case VideoCodecType::kVideoCodecAV1:
281       // TODO(philipel): Implement VP9 and AV1 to generic descriptor.
282       return;
283     case VideoCodecType::kVideoCodecH264:
284       if (codec_specific_info) {
285         H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
286                       is_keyframe, rtp_video_header);
287       }
288       return;
289     case VideoCodecType::kVideoCodecMultiplex:
290       return;
291   }
292   RTC_NOTREACHED() << "Unsupported codec.";
293 }
294 
GenericToGeneric(int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)295 void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
296                                         bool is_keyframe,
297                                         RTPVideoHeader* rtp_video_header) {
298   RTPVideoHeader::GenericDescriptorInfo& generic =
299       rtp_video_header->generic.emplace();
300 
301   generic.frame_id = shared_frame_id;
302 
303   if (is_keyframe) {
304     last_shared_frame_id_[0].fill(-1);
305   } else {
306     int64_t frame_id = last_shared_frame_id_[0][0];
307     RTC_DCHECK_NE(frame_id, -1);
308     RTC_DCHECK_LT(frame_id, shared_frame_id);
309     generic.dependencies.push_back(frame_id);
310   }
311 
312   last_shared_frame_id_[0][0] = shared_frame_id;
313 }
314 
H264ToGeneric(const CodecSpecificInfoH264 & h264_info,int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)315 void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
316                                      int64_t shared_frame_id,
317                                      bool is_keyframe,
318                                      RTPVideoHeader* rtp_video_header) {
319   const int temporal_index =
320       h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;
321 
322   if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
323     RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
324                            "used with generic frame descriptor.";
325     return;
326   }
327 
328   RTPVideoHeader::GenericDescriptorInfo& generic =
329       rtp_video_header->generic.emplace();
330 
331   generic.frame_id = shared_frame_id;
332   generic.temporal_index = temporal_index;
333 
334   if (is_keyframe) {
335     RTC_DCHECK_EQ(temporal_index, 0);
336     last_shared_frame_id_[/*spatial index*/ 0].fill(-1);
337     last_shared_frame_id_[/*spatial index*/ 0][temporal_index] =
338         shared_frame_id;
339     return;
340   }
341 
342   if (h264_info.base_layer_sync) {
343     int64_t tl0_frame_id = last_shared_frame_id_[/*spatial index*/ 0][0];
344 
345     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
346       if (last_shared_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
347         last_shared_frame_id_[/*spatial index*/ 0][i] = -1;
348       }
349     }
350 
351     RTC_DCHECK_GE(tl0_frame_id, 0);
352     RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
353     generic.dependencies.push_back(tl0_frame_id);
354   } else {
355     for (int i = 0; i <= temporal_index; ++i) {
356       int64_t frame_id = last_shared_frame_id_[/*spatial index*/ 0][i];
357 
358       if (frame_id != -1) {
359         RTC_DCHECK_LT(frame_id, shared_frame_id);
360         generic.dependencies.push_back(frame_id);
361       }
362     }
363   }
364 
365   last_shared_frame_id_[/*spatial_index*/ 0][temporal_index] = shared_frame_id;
366 }
367 
Vp8ToGeneric(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)368 void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
369                                     int64_t shared_frame_id,
370                                     bool is_keyframe,
371                                     RTPVideoHeader* rtp_video_header) {
372   const auto& vp8_header =
373       absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
374   const int spatial_index = 0;
375   const int temporal_index =
376       vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
377 
378   if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
379       spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
380     RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
381                            "used with generic frame descriptor.";
382     return;
383   }
384 
385   RTPVideoHeader::GenericDescriptorInfo& generic =
386       rtp_video_header->generic.emplace();
387 
388   generic.frame_id = shared_frame_id;
389   generic.spatial_index = spatial_index;
390   generic.temporal_index = temporal_index;
391 
392   if (vp8_info.useExplicitDependencies) {
393     SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
394                           vp8_header.layerSync, &generic);
395   } else {
396     SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe,
397                                  spatial_index, temporal_index,
398                                  vp8_header.layerSync, &generic);
399   }
400 }
401 
SetDependenciesVp8Deprecated(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,int spatial_index,int temporal_index,bool layer_sync,RTPVideoHeader::GenericDescriptorInfo * generic)402 void RtpPayloadParams::SetDependenciesVp8Deprecated(
403     const CodecSpecificInfoVP8& vp8_info,
404     int64_t shared_frame_id,
405     bool is_keyframe,
406     int spatial_index,
407     int temporal_index,
408     bool layer_sync,
409     RTPVideoHeader::GenericDescriptorInfo* generic) {
410   RTC_DCHECK(!vp8_info.useExplicitDependencies);
411   RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
412   new_version_used_ = false;
413 
414   if (is_keyframe) {
415     RTC_DCHECK_EQ(temporal_index, 0);
416     last_shared_frame_id_[spatial_index].fill(-1);
417     last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
418     return;
419   }
420 
421   if (layer_sync) {
422     int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
423 
424     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
425       if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) {
426         last_shared_frame_id_[spatial_index][i] = -1;
427       }
428     }
429 
430     RTC_DCHECK_GE(tl0_frame_id, 0);
431     RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
432     generic->dependencies.push_back(tl0_frame_id);
433   } else {
434     for (int i = 0; i <= temporal_index; ++i) {
435       int64_t frame_id = last_shared_frame_id_[spatial_index][i];
436 
437       if (frame_id != -1) {
438         RTC_DCHECK_LT(frame_id, shared_frame_id);
439         generic->dependencies.push_back(frame_id);
440       }
441     }
442   }
443 
444   last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
445 }
446 
SetDependenciesVp8New(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,bool layer_sync,RTPVideoHeader::GenericDescriptorInfo * generic)447 void RtpPayloadParams::SetDependenciesVp8New(
448     const CodecSpecificInfoVP8& vp8_info,
449     int64_t shared_frame_id,
450     bool is_keyframe,
451     bool layer_sync,
452     RTPVideoHeader::GenericDescriptorInfo* generic) {
453   RTC_DCHECK(vp8_info.useExplicitDependencies);
454   RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
455   new_version_used_ = true;
456 
457   if (is_keyframe) {
458     RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
459     buffer_id_to_frame_id_.fill(shared_frame_id);
460     return;
461   }
462 
463   constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
464 
465   RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
466   RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
467                 arraysize(vp8_info.referencedBuffers));
468 
469   for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
470     const size_t referenced_buffer = vp8_info.referencedBuffers[i];
471     RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
472     RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
473 
474     const int64_t dependency_frame_id =
475         buffer_id_to_frame_id_[referenced_buffer];
476     RTC_DCHECK_GE(dependency_frame_id, 0);
477     RTC_DCHECK_LT(dependency_frame_id, shared_frame_id);
478 
479     const bool is_new_dependency =
480         std::find(generic->dependencies.begin(), generic->dependencies.end(),
481                   dependency_frame_id) == generic->dependencies.end();
482     if (is_new_dependency) {
483       generic->dependencies.push_back(dependency_frame_id);
484     }
485   }
486 
487   RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
488   for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
489     const size_t updated_id = vp8_info.updatedBuffers[i];
490     buffer_id_to_frame_id_[updated_id] = shared_frame_id;
491   }
492 
493   RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
494 }
495 
496 }  // namespace webrtc
497