1 /*
2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/video_coding/decoding_state.h"
12 
13 #include "common_video/h264/h264_common.h"
14 #include "modules/include/module_common_types_public.h"
15 #include "modules/video_coding/frame_buffer.h"
16 #include "modules/video_coding/jitter_buffer_common.h"
17 #include "modules/video_coding/packet.h"
18 #include "rtc_base/logging.h"
19 
20 namespace webrtc {
21 
VCMDecodingState()22 VCMDecodingState::VCMDecodingState()
23     : sequence_num_(0),
24       time_stamp_(0),
25       picture_id_(kNoPictureId),
26       temporal_id_(kNoTemporalIdx),
27       tl0_pic_id_(kNoTl0PicIdx),
28       full_sync_(true),
29       in_initial_state_(true) {
30   memset(frame_decoded_, 0, sizeof(frame_decoded_));
31 }
32 
~VCMDecodingState()33 VCMDecodingState::~VCMDecodingState() {}
34 
Reset()35 void VCMDecodingState::Reset() {
36   // TODO(mikhal): Verify - not always would want to reset the sync
37   sequence_num_ = 0;
38   time_stamp_ = 0;
39   picture_id_ = kNoPictureId;
40   temporal_id_ = kNoTemporalIdx;
41   tl0_pic_id_ = kNoTl0PicIdx;
42   full_sync_ = true;
43   in_initial_state_ = true;
44   memset(frame_decoded_, 0, sizeof(frame_decoded_));
45   received_sps_.clear();
46   received_pps_.clear();
47 }
48 
time_stamp() const49 uint32_t VCMDecodingState::time_stamp() const {
50   return time_stamp_;
51 }
52 
sequence_num() const53 uint16_t VCMDecodingState::sequence_num() const {
54   return sequence_num_;
55 }
56 
IsOldFrame(const VCMFrameBuffer * frame) const57 bool VCMDecodingState::IsOldFrame(const VCMFrameBuffer* frame) const {
58   assert(frame != NULL);
59   if (in_initial_state_)
60     return false;
61   return !IsNewerTimestamp(frame->Timestamp(), time_stamp_);
62 }
63 
IsOldPacket(const VCMPacket * packet) const64 bool VCMDecodingState::IsOldPacket(const VCMPacket* packet) const {
65   assert(packet != NULL);
66   if (in_initial_state_)
67     return false;
68   return !IsNewerTimestamp(packet->timestamp, time_stamp_);
69 }
70 
SetState(const VCMFrameBuffer * frame)71 void VCMDecodingState::SetState(const VCMFrameBuffer* frame) {
72   assert(frame != NULL && frame->GetHighSeqNum() >= 0);
73   if (!UsingFlexibleMode(frame))
74     UpdateSyncState(frame);
75   sequence_num_ = static_cast<uint16_t>(frame->GetHighSeqNum());
76   time_stamp_ = frame->Timestamp();
77   picture_id_ = frame->PictureId();
78   temporal_id_ = frame->TemporalId();
79   tl0_pic_id_ = frame->Tl0PicId();
80 
81   for (const NaluInfo& nalu : frame->GetNaluInfos()) {
82     if (nalu.type == H264::NaluType::kPps) {
83       if (nalu.pps_id < 0) {
84         RTC_LOG(LS_WARNING) << "Received pps without pps id.";
85       } else if (nalu.sps_id < 0) {
86         RTC_LOG(LS_WARNING) << "Received pps without sps id.";
87       } else {
88         received_pps_[nalu.pps_id] = nalu.sps_id;
89       }
90     } else if (nalu.type == H264::NaluType::kSps) {
91       if (nalu.sps_id < 0) {
92         RTC_LOG(LS_WARNING) << "Received sps without sps id.";
93       } else {
94         received_sps_.insert(nalu.sps_id);
95       }
96     }
97   }
98 
99   if (UsingFlexibleMode(frame)) {
100     uint16_t frame_index = picture_id_ % kFrameDecodedLength;
101     if (in_initial_state_) {
102       frame_decoded_cleared_to_ = frame_index;
103     } else if (frame->FrameType() == VideoFrameType::kVideoFrameKey) {
104       memset(frame_decoded_, 0, sizeof(frame_decoded_));
105       frame_decoded_cleared_to_ = frame_index;
106     } else {
107       if (AheadOfFramesDecodedClearedTo(frame_index)) {
108         while (frame_decoded_cleared_to_ != frame_index) {
109           frame_decoded_cleared_to_ =
110               (frame_decoded_cleared_to_ + 1) % kFrameDecodedLength;
111           frame_decoded_[frame_decoded_cleared_to_] = false;
112         }
113       }
114     }
115     frame_decoded_[frame_index] = true;
116   }
117 
118   in_initial_state_ = false;
119 }
120 
CopyFrom(const VCMDecodingState & state)121 void VCMDecodingState::CopyFrom(const VCMDecodingState& state) {
122   sequence_num_ = state.sequence_num_;
123   time_stamp_ = state.time_stamp_;
124   picture_id_ = state.picture_id_;
125   temporal_id_ = state.temporal_id_;
126   tl0_pic_id_ = state.tl0_pic_id_;
127   full_sync_ = state.full_sync_;
128   in_initial_state_ = state.in_initial_state_;
129   frame_decoded_cleared_to_ = state.frame_decoded_cleared_to_;
130   memcpy(frame_decoded_, state.frame_decoded_, sizeof(frame_decoded_));
131   received_sps_ = state.received_sps_;
132   received_pps_ = state.received_pps_;
133 }
134 
UpdateEmptyFrame(const VCMFrameBuffer * frame)135 bool VCMDecodingState::UpdateEmptyFrame(const VCMFrameBuffer* frame) {
136   bool empty_packet = frame->GetHighSeqNum() == frame->GetLowSeqNum();
137   if (in_initial_state_ && empty_packet) {
138     // Drop empty packets as long as we are in the initial state.
139     return true;
140   }
141   if ((empty_packet && ContinuousSeqNum(frame->GetHighSeqNum())) ||
142       ContinuousFrame(frame)) {
143     // Continuous empty packets or continuous frames can be dropped if we
144     // advance the sequence number.
145     sequence_num_ = frame->GetHighSeqNum();
146     time_stamp_ = frame->Timestamp();
147     return true;
148   }
149   return false;
150 }
151 
UpdateOldPacket(const VCMPacket * packet)152 void VCMDecodingState::UpdateOldPacket(const VCMPacket* packet) {
153   assert(packet != NULL);
154   if (packet->timestamp == time_stamp_) {
155     // Late packet belonging to the last decoded frame - make sure we update the
156     // last decoded sequence number.
157     sequence_num_ = LatestSequenceNumber(packet->seqNum, sequence_num_);
158   }
159 }
160 
SetSeqNum(uint16_t new_seq_num)161 void VCMDecodingState::SetSeqNum(uint16_t new_seq_num) {
162   sequence_num_ = new_seq_num;
163 }
164 
in_initial_state() const165 bool VCMDecodingState::in_initial_state() const {
166   return in_initial_state_;
167 }
168 
full_sync() const169 bool VCMDecodingState::full_sync() const {
170   return full_sync_;
171 }
172 
UpdateSyncState(const VCMFrameBuffer * frame)173 void VCMDecodingState::UpdateSyncState(const VCMFrameBuffer* frame) {
174   if (in_initial_state_)
175     return;
176   if (frame->TemporalId() == kNoTemporalIdx ||
177       frame->Tl0PicId() == kNoTl0PicIdx) {
178     full_sync_ = true;
179   } else if (frame->FrameType() == VideoFrameType::kVideoFrameKey ||
180              frame->LayerSync()) {
181     full_sync_ = true;
182   } else if (full_sync_) {
183     // Verify that we are still in sync.
184     // Sync will be broken if continuity is true for layers but not for the
185     // other methods (PictureId and SeqNum).
186     if (UsingPictureId(frame)) {
187       // First check for a valid tl0PicId.
188       if (frame->Tl0PicId() - tl0_pic_id_ > 1) {
189         full_sync_ = false;
190       } else {
191         full_sync_ = ContinuousPictureId(frame->PictureId());
192       }
193     } else {
194       full_sync_ =
195           ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum()));
196     }
197   }
198 }
199 
ContinuousFrame(const VCMFrameBuffer * frame) const200 bool VCMDecodingState::ContinuousFrame(const VCMFrameBuffer* frame) const {
201   // Check continuity based on the following hierarchy:
202   // - Temporal layers (stop here if out of sync).
203   // - Picture Id when available.
204   // - Sequence numbers.
205   // Return true when in initial state.
206   // Note that when a method is not applicable it will return false.
207   assert(frame != NULL);
208   // A key frame is always considered continuous as it doesn't refer to any
209   // frames and therefore won't introduce any errors even if prior frames are
210   // missing.
211   if (frame->FrameType() == VideoFrameType::kVideoFrameKey &&
212       HaveSpsAndPps(frame->GetNaluInfos())) {
213     return true;
214   }
215   // When in the initial state we always require a key frame to start decoding.
216   if (in_initial_state_)
217     return false;
218   if (ContinuousLayer(frame->TemporalId(), frame->Tl0PicId()))
219     return true;
220   // tl0picId is either not used, or should remain unchanged.
221   if (frame->Tl0PicId() != tl0_pic_id_)
222     return false;
223   // Base layers are not continuous or temporal layers are inactive.
224   // In the presence of temporal layers, check for Picture ID/sequence number
225   // continuity if sync can be restored by this frame.
226   if (!full_sync_ && !frame->LayerSync())
227     return false;
228   if (UsingPictureId(frame)) {
229     if (UsingFlexibleMode(frame)) {
230       return ContinuousFrameRefs(frame);
231     } else {
232       return ContinuousPictureId(frame->PictureId());
233     }
234   } else {
235     return ContinuousSeqNum(static_cast<uint16_t>(frame->GetLowSeqNum())) &&
236            HaveSpsAndPps(frame->GetNaluInfos());
237   }
238 }
239 
ContinuousPictureId(int picture_id) const240 bool VCMDecodingState::ContinuousPictureId(int picture_id) const {
241   int next_picture_id = picture_id_ + 1;
242   if (picture_id < picture_id_) {
243     // Wrap
244     if (picture_id_ >= 0x80) {
245       // 15 bits used for picture id
246       return ((next_picture_id & 0x7FFF) == picture_id);
247     } else {
248       // 7 bits used for picture id
249       return ((next_picture_id & 0x7F) == picture_id);
250     }
251   }
252   // No wrap
253   return (next_picture_id == picture_id);
254 }
255 
ContinuousSeqNum(uint16_t seq_num) const256 bool VCMDecodingState::ContinuousSeqNum(uint16_t seq_num) const {
257   return seq_num == static_cast<uint16_t>(sequence_num_ + 1);
258 }
259 
ContinuousLayer(int temporal_id,int tl0_pic_id) const260 bool VCMDecodingState::ContinuousLayer(int temporal_id, int tl0_pic_id) const {
261   // First, check if applicable.
262   if (temporal_id == kNoTemporalIdx || tl0_pic_id == kNoTl0PicIdx)
263     return false;
264   // If this is the first frame to use temporal layers, make sure we start
265   // from base.
266   else if (tl0_pic_id_ == kNoTl0PicIdx && temporal_id_ == kNoTemporalIdx &&
267            temporal_id == 0)
268     return true;
269 
270   // Current implementation: Look for base layer continuity.
271   if (temporal_id != 0)
272     return false;
273   return (static_cast<uint8_t>(tl0_pic_id_ + 1) == tl0_pic_id);
274 }
275 
ContinuousFrameRefs(const VCMFrameBuffer * frame) const276 bool VCMDecodingState::ContinuousFrameRefs(const VCMFrameBuffer* frame) const {
277   uint8_t num_refs = frame->CodecSpecific()->codecSpecific.VP9.num_ref_pics;
278   for (uint8_t r = 0; r < num_refs; ++r) {
279     uint16_t frame_ref = frame->PictureId() -
280                          frame->CodecSpecific()->codecSpecific.VP9.p_diff[r];
281     uint16_t frame_index = frame_ref % kFrameDecodedLength;
282     if (AheadOfFramesDecodedClearedTo(frame_index) ||
283         !frame_decoded_[frame_index]) {
284       return false;
285     }
286   }
287   return true;
288 }
289 
UsingPictureId(const VCMFrameBuffer * frame) const290 bool VCMDecodingState::UsingPictureId(const VCMFrameBuffer* frame) const {
291   return (frame->PictureId() != kNoPictureId && picture_id_ != kNoPictureId);
292 }
293 
UsingFlexibleMode(const VCMFrameBuffer * frame) const294 bool VCMDecodingState::UsingFlexibleMode(const VCMFrameBuffer* frame) const {
295   bool is_flexible_mode =
296       frame->CodecSpecific()->codecType == kVideoCodecVP9 &&
297       frame->CodecSpecific()->codecSpecific.VP9.flexible_mode;
298   if (is_flexible_mode && frame->PictureId() == kNoPictureId) {
299     RTC_LOG(LS_WARNING) << "Frame is marked as using flexible mode but no"
300                            "picture id is set.";
301     return false;
302   }
303   return is_flexible_mode;
304 }
305 
306 // TODO(philipel): change how check work, this check practially
307 // limits the max p_diff to 64.
AheadOfFramesDecodedClearedTo(uint16_t index) const308 bool VCMDecodingState::AheadOfFramesDecodedClearedTo(uint16_t index) const {
309   // No way of knowing for sure if we are actually ahead of
310   // frame_decoded_cleared_to_. We just make the assumption
311   // that we are not trying to reference back to a very old
312   // index, but instead are referencing a newer index.
313   uint16_t diff =
314       index > frame_decoded_cleared_to_
315           ? kFrameDecodedLength - (index - frame_decoded_cleared_to_)
316           : frame_decoded_cleared_to_ - index;
317   return diff > kFrameDecodedLength / 2;
318 }
319 
HaveSpsAndPps(const std::vector<NaluInfo> & nalus) const320 bool VCMDecodingState::HaveSpsAndPps(const std::vector<NaluInfo>& nalus) const {
321   std::set<int> new_sps;
322   std::map<int, int> new_pps;
323   for (const NaluInfo& nalu : nalus) {
324     // Check if this nalu actually contains sps/pps information or dependencies.
325     if (nalu.sps_id == -1 && nalu.pps_id == -1)
326       continue;
327     switch (nalu.type) {
328       case H264::NaluType::kPps:
329         if (nalu.pps_id < 0) {
330           RTC_LOG(LS_WARNING) << "Received pps without pps id.";
331         } else if (nalu.sps_id < 0) {
332           RTC_LOG(LS_WARNING) << "Received pps without sps id.";
333         } else {
334           new_pps[nalu.pps_id] = nalu.sps_id;
335         }
336         break;
337       case H264::NaluType::kSps:
338         if (nalu.sps_id < 0) {
339           RTC_LOG(LS_WARNING) << "Received sps without sps id.";
340         } else {
341           new_sps.insert(nalu.sps_id);
342         }
343         break;
344       default: {
345         int needed_sps = -1;
346         auto pps_it = new_pps.find(nalu.pps_id);
347         if (pps_it != new_pps.end()) {
348           needed_sps = pps_it->second;
349         } else {
350           auto pps_it2 = received_pps_.find(nalu.pps_id);
351           if (pps_it2 == received_pps_.end()) {
352             return false;
353           }
354           needed_sps = pps_it2->second;
355         }
356         if (new_sps.find(needed_sps) == new_sps.end() &&
357             received_sps_.find(needed_sps) == received_sps_.end()) {
358           return false;
359         }
360         break;
361       }
362     }
363   }
364   return true;
365 }
366 
367 }  // namespace webrtc
368