1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio/channel_receive.h"
12 
13 #include <assert.h>
14 
15 #include <algorithm>
16 #include <map>
17 #include <memory>
18 #include <string>
19 #include <utility>
20 #include <vector>
21 
22 #include "api/crypto/frame_decryptor_interface.h"
23 #include "api/frame_transformer_interface.h"
24 #include "api/rtc_event_log/rtc_event_log.h"
25 #include "audio/audio_level.h"
26 #include "audio/channel_receive_frame_transformer_delegate.h"
27 #include "audio/channel_send.h"
28 #include "audio/utility/audio_frame_operations.h"
29 #include "logging/rtc_event_log/events/rtc_event_audio_playout.h"
30 #include "modules/audio_coding/acm2/acm_receiver.h"
31 #include "modules/audio_coding/audio_network_adaptor/include/audio_network_adaptor_config.h"
32 #include "modules/audio_device/include/audio_device.h"
33 #include "modules/pacing/packet_router.h"
34 #include "modules/rtp_rtcp/include/receive_statistics.h"
35 #include "modules/rtp_rtcp/include/remote_ntp_time_estimator.h"
36 #include "modules/rtp_rtcp/source/absolute_capture_time_receiver.h"
37 #include "modules/rtp_rtcp/source/rtp_header_extensions.h"
38 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
39 #include "modules/rtp_rtcp/source/rtp_rtcp_config.h"
40 #include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h"
41 #include "modules/utility/include/process_thread.h"
42 #include "rtc_base/checks.h"
43 #include "rtc_base/format_macros.h"
44 #include "rtc_base/location.h"
45 #include "rtc_base/logging.h"
46 #include "rtc_base/numerics/safe_minmax.h"
47 #include "rtc_base/race_checker.h"
48 #include "rtc_base/synchronization/mutex.h"
49 #include "rtc_base/thread_checker.h"
50 #include "rtc_base/time_utils.h"
51 #include "system_wrappers/include/metrics.h"
52 
53 namespace webrtc {
54 namespace voe {
55 
56 namespace {
57 
58 constexpr double kAudioSampleDurationSeconds = 0.01;
59 
60 // Video Sync.
61 constexpr int kVoiceEngineMinMinPlayoutDelayMs = 0;
62 constexpr int kVoiceEngineMaxMinPlayoutDelayMs = 10000;
63 
AcmConfig(NetEqFactory * neteq_factory,rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,absl::optional<AudioCodecPairId> codec_pair_id,size_t jitter_buffer_max_packets,bool jitter_buffer_fast_playout)64 AudioCodingModule::Config AcmConfig(
65     NetEqFactory* neteq_factory,
66     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
67     absl::optional<AudioCodecPairId> codec_pair_id,
68     size_t jitter_buffer_max_packets,
69     bool jitter_buffer_fast_playout) {
70   AudioCodingModule::Config acm_config;
71   acm_config.neteq_factory = neteq_factory;
72   acm_config.decoder_factory = decoder_factory;
73   acm_config.neteq_config.codec_pair_id = codec_pair_id;
74   acm_config.neteq_config.max_packets_in_buffer = jitter_buffer_max_packets;
75   acm_config.neteq_config.enable_fast_accelerate = jitter_buffer_fast_playout;
76   acm_config.neteq_config.enable_muted_state = true;
77 
78   return acm_config;
79 }
80 
81 class ChannelReceive : public ChannelReceiveInterface {
82  public:
83   // Used for receive streams.
84   ChannelReceive(
85       Clock* clock,
86       ProcessThread* module_process_thread,
87       NetEqFactory* neteq_factory,
88       AudioDeviceModule* audio_device_module,
89       Transport* rtcp_send_transport,
90       RtcEventLog* rtc_event_log,
91       uint32_t local_ssrc,
92       uint32_t remote_ssrc,
93       size_t jitter_buffer_max_packets,
94       bool jitter_buffer_fast_playout,
95       int jitter_buffer_min_delay_ms,
96       bool jitter_buffer_enable_rtx_handling,
97       rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
98       absl::optional<AudioCodecPairId> codec_pair_id,
99       rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
100       const webrtc::CryptoOptions& crypto_options,
101       rtc::scoped_refptr<FrameTransformerInterface> frame_transformer);
102   ~ChannelReceive() override;
103 
104   void SetSink(AudioSinkInterface* sink) override;
105 
106   void SetReceiveCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
107 
108   // API methods
109 
110   void StartPlayout() override;
111   void StopPlayout() override;
112 
113   // Codecs
114   absl::optional<std::pair<int, SdpAudioFormat>> GetReceiveCodec()
115       const override;
116 
117   void ReceivedRTCPPacket(const uint8_t* data, size_t length) override;
118 
119   // RtpPacketSinkInterface.
120   void OnRtpPacket(const RtpPacketReceived& packet) override;
121 
122   // Muting, Volume and Level.
123   void SetChannelOutputVolumeScaling(float scaling) override;
124   int GetSpeechOutputLevelFullRange() const override;
125   // See description of "totalAudioEnergy" in the WebRTC stats spec:
126   // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats-totalaudioenergy
127   double GetTotalOutputEnergy() const override;
128   double GetTotalOutputDuration() const override;
129 
130   // Stats.
131   NetworkStatistics GetNetworkStatistics() const override;
132   AudioDecodingCallStats GetDecodingCallStatistics() const override;
133 
134   // Audio+Video Sync.
135   uint32_t GetDelayEstimate() const override;
136   void SetMinimumPlayoutDelay(int delayMs) override;
137   bool GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp,
138                               int64_t* time_ms) const override;
139   void SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms,
140                                          int64_t time_ms) override;
141   absl::optional<int64_t> GetCurrentEstimatedPlayoutNtpTimestampMs(
142       int64_t now_ms) const override;
143 
144   // Audio quality.
145   bool SetBaseMinimumPlayoutDelayMs(int delay_ms) override;
146   int GetBaseMinimumPlayoutDelayMs() const override;
147 
148   // Produces the transport-related timestamps; current_delay_ms is left unset.
149   absl::optional<Syncable::Info> GetSyncInfo() const override;
150 
151   void RegisterReceiverCongestionControlObjects(
152       PacketRouter* packet_router) override;
153   void ResetReceiverCongestionControlObjects() override;
154 
155   CallReceiveStatistics GetRTCPStatistics() const override;
156   void SetNACKStatus(bool enable, int maxNumberOfPackets) override;
157 
158   AudioMixer::Source::AudioFrameInfo GetAudioFrameWithInfo(
159       int sample_rate_hz,
160       AudioFrame* audio_frame) override;
161 
162   int PreferredSampleRate() const override;
163 
164   // Associate to a send channel.
165   // Used for obtaining RTT for a receive-only channel.
166   void SetAssociatedSendChannel(const ChannelSendInterface* channel) override;
167 
168   // Sets a frame transformer between the depacketizer and the decoder, to
169   // transform the received frames before decoding them.
170   void SetDepacketizerToDecoderFrameTransformer(
171       rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)
172       override;
173 
174  private:
175   void ReceivePacket(const uint8_t* packet,
176                      size_t packet_length,
177                      const RTPHeader& header);
178   int ResendPackets(const uint16_t* sequence_numbers, int length);
179   void UpdatePlayoutTimestamp(bool rtcp, int64_t now_ms);
180 
181   int GetRtpTimestampRateHz() const;
182   int64_t GetRTT() const;
183 
184   void OnReceivedPayloadData(rtc::ArrayView<const uint8_t> payload,
185                              const RTPHeader& rtpHeader);
186 
187   void InitFrameTransformerDelegate(
188       rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer);
189 
Playing() const190   bool Playing() const {
191     MutexLock lock(&playing_lock_);
192     return playing_;
193   }
194 
195   // Thread checkers document and lock usage of some methods to specific threads
196   // we know about. The goal is to eventually split up voe::ChannelReceive into
197   // parts with single-threaded semantics, and thereby reduce the need for
198   // locks.
199   rtc::ThreadChecker worker_thread_checker_;
200   rtc::ThreadChecker module_process_thread_checker_;
201   // Methods accessed from audio and video threads are checked for sequential-
202   // only access. We don't necessarily own and control these threads, so thread
203   // checkers cannot be used. E.g. Chromium may transfer "ownership" from one
204   // audio thread to another, but access is still sequential.
205   rtc::RaceChecker audio_thread_race_checker_;
206   rtc::RaceChecker video_capture_thread_race_checker_;
207   Mutex callback_mutex_;
208   Mutex volume_settings_mutex_;
209 
210   mutable Mutex playing_lock_;
211   bool playing_ RTC_GUARDED_BY(&playing_lock_) = false;
212 
213   RtcEventLog* const event_log_;
214 
215   // Indexed by payload type.
216   std::map<uint8_t, int> payload_type_frequencies_;
217 
218   std::unique_ptr<ReceiveStatistics> rtp_receive_statistics_;
219   std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_;
220   const uint32_t remote_ssrc_;
221 
222   // Info for GetSyncInfo is updated on network or worker thread, and queried on
223   // the worker thread.
224   mutable Mutex sync_info_lock_;
225   absl::optional<uint32_t> last_received_rtp_timestamp_
226       RTC_GUARDED_BY(&sync_info_lock_);
227   absl::optional<int64_t> last_received_rtp_system_time_ms_
228       RTC_GUARDED_BY(&sync_info_lock_);
229 
230   // The AcmReceiver is thread safe, using its own lock.
231   acm2::AcmReceiver acm_receiver_;
232   AudioSinkInterface* audio_sink_ = nullptr;
233   AudioLevel _outputAudioLevel;
234 
235   RemoteNtpTimeEstimator ntp_estimator_ RTC_GUARDED_BY(ts_stats_lock_);
236 
237   // Timestamp of the audio pulled from NetEq.
238   absl::optional<uint32_t> jitter_buffer_playout_timestamp_;
239 
240   mutable Mutex video_sync_lock_;
241   uint32_t playout_timestamp_rtp_ RTC_GUARDED_BY(video_sync_lock_);
242   absl::optional<int64_t> playout_timestamp_rtp_time_ms_
243       RTC_GUARDED_BY(video_sync_lock_);
244   uint32_t playout_delay_ms_ RTC_GUARDED_BY(video_sync_lock_);
245   absl::optional<int64_t> playout_timestamp_ntp_
246       RTC_GUARDED_BY(video_sync_lock_);
247   absl::optional<int64_t> playout_timestamp_ntp_time_ms_
248       RTC_GUARDED_BY(video_sync_lock_);
249 
250   mutable Mutex ts_stats_lock_;
251 
252   std::unique_ptr<rtc::TimestampWrapAroundHandler> rtp_ts_wraparound_handler_;
253   // The rtp timestamp of the first played out audio frame.
254   int64_t capture_start_rtp_time_stamp_;
255   // The capture ntp time (in local timebase) of the first played out audio
256   // frame.
257   int64_t capture_start_ntp_time_ms_ RTC_GUARDED_BY(ts_stats_lock_);
258 
259   // uses
260   ProcessThread* _moduleProcessThreadPtr;
261   AudioDeviceModule* _audioDeviceModulePtr;
262   float _outputGain RTC_GUARDED_BY(volume_settings_mutex_);
263 
264   // An associated send channel.
265   mutable Mutex assoc_send_channel_lock_;
266   const ChannelSendInterface* associated_send_channel_
267       RTC_GUARDED_BY(assoc_send_channel_lock_);
268 
269   PacketRouter* packet_router_ = nullptr;
270 
271   rtc::ThreadChecker construction_thread_;
272 
273   // E2EE Audio Frame Decryption
274   rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor_;
275   webrtc::CryptoOptions crypto_options_;
276 
277   webrtc::AbsoluteCaptureTimeReceiver absolute_capture_time_receiver_;
278 
279   rtc::scoped_refptr<ChannelReceiveFrameTransformerDelegate>
280       frame_transformer_delegate_;
281 };
282 
OnReceivedPayloadData(rtc::ArrayView<const uint8_t> payload,const RTPHeader & rtpHeader)283 void ChannelReceive::OnReceivedPayloadData(
284     rtc::ArrayView<const uint8_t> payload,
285     const RTPHeader& rtpHeader) {
286   if (!Playing()) {
287     // Avoid inserting into NetEQ when we are not playing. Count the
288     // packet as discarded.
289     return;
290   }
291 
292   // Push the incoming payload (parsed and ready for decoding) into the ACM
293   if (acm_receiver_.InsertPacket(rtpHeader, payload) != 0) {
294     RTC_DLOG(LS_ERROR) << "ChannelReceive::OnReceivedPayloadData() unable to "
295                           "push data to the ACM";
296     return;
297   }
298 
299   int64_t round_trip_time = 0;
300   rtp_rtcp_->RTT(remote_ssrc_, &round_trip_time, NULL, NULL, NULL);
301 
302   std::vector<uint16_t> nack_list = acm_receiver_.GetNackList(round_trip_time);
303   if (!nack_list.empty()) {
304     // Can't use nack_list.data() since it's not supported by all
305     // compilers.
306     ResendPackets(&(nack_list[0]), static_cast<int>(nack_list.size()));
307   }
308 }
309 
InitFrameTransformerDelegate(rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)310 void ChannelReceive::InitFrameTransformerDelegate(
311     rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
312   RTC_DCHECK(frame_transformer);
313   RTC_DCHECK(!frame_transformer_delegate_);
314 
315   // Pass a callback to ChannelReceive::OnReceivedPayloadData, to be called by
316   // the delegate to receive transformed audio.
317   ChannelReceiveFrameTransformerDelegate::ReceiveFrameCallback
318       receive_audio_callback = [this](rtc::ArrayView<const uint8_t> packet,
319                                       const RTPHeader& header) {
320         OnReceivedPayloadData(packet, header);
321       };
322   frame_transformer_delegate_ =
323       new rtc::RefCountedObject<ChannelReceiveFrameTransformerDelegate>(
324           std::move(receive_audio_callback), std::move(frame_transformer),
325           rtc::Thread::Current());
326   frame_transformer_delegate_->Init();
327 }
328 
GetAudioFrameWithInfo(int sample_rate_hz,AudioFrame * audio_frame)329 AudioMixer::Source::AudioFrameInfo ChannelReceive::GetAudioFrameWithInfo(
330     int sample_rate_hz,
331     AudioFrame* audio_frame) {
332   RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_);
333   audio_frame->sample_rate_hz_ = sample_rate_hz;
334 
335   event_log_->Log(std::make_unique<RtcEventAudioPlayout>(remote_ssrc_));
336 
337   // Get 10ms raw PCM data from the ACM (mixer limits output frequency)
338   bool muted;
339   if (acm_receiver_.GetAudio(audio_frame->sample_rate_hz_, audio_frame,
340                              &muted) == -1) {
341     RTC_DLOG(LS_ERROR)
342         << "ChannelReceive::GetAudioFrame() PlayoutData10Ms() failed!";
343     // In all likelihood, the audio in this frame is garbage. We return an
344     // error so that the audio mixer module doesn't add it to the mix. As
345     // a result, it won't be played out and the actions skipped here are
346     // irrelevant.
347     return AudioMixer::Source::AudioFrameInfo::kError;
348   }
349 
350   if (muted) {
351     // TODO(henrik.lundin): We should be able to do better than this. But we
352     // will have to go through all the cases below where the audio samples may
353     // be used, and handle the muted case in some way.
354     AudioFrameOperations::Mute(audio_frame);
355   }
356 
357   {
358     // Pass the audio buffers to an optional sink callback, before applying
359     // scaling/panning, as that applies to the mix operation.
360     // External recipients of the audio (e.g. via AudioTrack), will do their
361     // own mixing/dynamic processing.
362     MutexLock lock(&callback_mutex_);
363     if (audio_sink_) {
364       AudioSinkInterface::Data data(
365           audio_frame->data(), audio_frame->samples_per_channel_,
366           audio_frame->sample_rate_hz_, audio_frame->num_channels_,
367           audio_frame->timestamp_);
368       audio_sink_->OnData(data);
369     }
370   }
371 
372   float output_gain = 1.0f;
373   {
374     MutexLock lock(&volume_settings_mutex_);
375     output_gain = _outputGain;
376   }
377 
378   // Output volume scaling
379   if (output_gain < 0.99f || output_gain > 1.01f) {
380     // TODO(solenberg): Combine with mute state - this can cause clicks!
381     AudioFrameOperations::ScaleWithSat(output_gain, audio_frame);
382   }
383 
384   // Measure audio level (0-9)
385   // TODO(henrik.lundin) Use the |muted| information here too.
386   // TODO(deadbeef): Use RmsLevel for |_outputAudioLevel| (see
387   // https://crbug.com/webrtc/7517).
388   _outputAudioLevel.ComputeLevel(*audio_frame, kAudioSampleDurationSeconds);
389 
390   if (capture_start_rtp_time_stamp_ < 0 && audio_frame->timestamp_ != 0) {
391     // The first frame with a valid rtp timestamp.
392     capture_start_rtp_time_stamp_ = audio_frame->timestamp_;
393   }
394 
395   if (capture_start_rtp_time_stamp_ >= 0) {
396     // audio_frame.timestamp_ should be valid from now on.
397 
398     // Compute elapsed time.
399     int64_t unwrap_timestamp =
400         rtp_ts_wraparound_handler_->Unwrap(audio_frame->timestamp_);
401     audio_frame->elapsed_time_ms_ =
402         (unwrap_timestamp - capture_start_rtp_time_stamp_) /
403         (GetRtpTimestampRateHz() / 1000);
404 
405     {
406       MutexLock lock(&ts_stats_lock_);
407       // Compute ntp time.
408       audio_frame->ntp_time_ms_ =
409           ntp_estimator_.Estimate(audio_frame->timestamp_);
410       // |ntp_time_ms_| won't be valid until at least 2 RTCP SRs are received.
411       if (audio_frame->ntp_time_ms_ > 0) {
412         // Compute |capture_start_ntp_time_ms_| so that
413         // |capture_start_ntp_time_ms_| + |elapsed_time_ms_| == |ntp_time_ms_|
414         capture_start_ntp_time_ms_ =
415             audio_frame->ntp_time_ms_ - audio_frame->elapsed_time_ms_;
416       }
417     }
418   }
419 
420   {
421     RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.TargetJitterBufferDelayMs",
422                               acm_receiver_.TargetDelayMs());
423     const int jitter_buffer_delay = acm_receiver_.FilteredCurrentDelayMs();
424     MutexLock lock(&video_sync_lock_);
425     RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverDelayEstimateMs",
426                               jitter_buffer_delay + playout_delay_ms_);
427     RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverJitterBufferDelayMs",
428                               jitter_buffer_delay);
429     RTC_HISTOGRAM_COUNTS_1000("WebRTC.Audio.ReceiverDeviceDelayMs",
430                               playout_delay_ms_);
431   }
432 
433   return muted ? AudioMixer::Source::AudioFrameInfo::kMuted
434                : AudioMixer::Source::AudioFrameInfo::kNormal;
435 }
436 
PreferredSampleRate() const437 int ChannelReceive::PreferredSampleRate() const {
438   RTC_DCHECK_RUNS_SERIALIZED(&audio_thread_race_checker_);
439   // Return the bigger of playout and receive frequency in the ACM.
440   return std::max(acm_receiver_.last_packet_sample_rate_hz().value_or(0),
441                   acm_receiver_.last_output_sample_rate_hz());
442 }
443 
ChannelReceive(Clock * clock,ProcessThread * module_process_thread,NetEqFactory * neteq_factory,AudioDeviceModule * audio_device_module,Transport * rtcp_send_transport,RtcEventLog * rtc_event_log,uint32_t local_ssrc,uint32_t remote_ssrc,size_t jitter_buffer_max_packets,bool jitter_buffer_fast_playout,int jitter_buffer_min_delay_ms,bool jitter_buffer_enable_rtx_handling,rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,absl::optional<AudioCodecPairId> codec_pair_id,rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,const webrtc::CryptoOptions & crypto_options,rtc::scoped_refptr<FrameTransformerInterface> frame_transformer)444 ChannelReceive::ChannelReceive(
445     Clock* clock,
446     ProcessThread* module_process_thread,
447     NetEqFactory* neteq_factory,
448     AudioDeviceModule* audio_device_module,
449     Transport* rtcp_send_transport,
450     RtcEventLog* rtc_event_log,
451     uint32_t local_ssrc,
452     uint32_t remote_ssrc,
453     size_t jitter_buffer_max_packets,
454     bool jitter_buffer_fast_playout,
455     int jitter_buffer_min_delay_ms,
456     bool jitter_buffer_enable_rtx_handling,
457     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
458     absl::optional<AudioCodecPairId> codec_pair_id,
459     rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
460     const webrtc::CryptoOptions& crypto_options,
461     rtc::scoped_refptr<FrameTransformerInterface> frame_transformer)
462     : event_log_(rtc_event_log),
463       rtp_receive_statistics_(ReceiveStatistics::Create(clock)),
464       remote_ssrc_(remote_ssrc),
465       acm_receiver_(AcmConfig(neteq_factory,
466                               decoder_factory,
467                               codec_pair_id,
468                               jitter_buffer_max_packets,
469                               jitter_buffer_fast_playout)),
470       _outputAudioLevel(),
471       ntp_estimator_(clock),
472       playout_timestamp_rtp_(0),
473       playout_delay_ms_(0),
474       rtp_ts_wraparound_handler_(new rtc::TimestampWrapAroundHandler()),
475       capture_start_rtp_time_stamp_(-1),
476       capture_start_ntp_time_ms_(-1),
477       _moduleProcessThreadPtr(module_process_thread),
478       _audioDeviceModulePtr(audio_device_module),
479       _outputGain(1.0f),
480       associated_send_channel_(nullptr),
481       frame_decryptor_(frame_decryptor),
482       crypto_options_(crypto_options),
483       absolute_capture_time_receiver_(clock) {
484   // TODO(nisse): Use _moduleProcessThreadPtr instead?
485   module_process_thread_checker_.Detach();
486 
487   RTC_DCHECK(module_process_thread);
488   RTC_DCHECK(audio_device_module);
489 
490   acm_receiver_.ResetInitialDelay();
491   acm_receiver_.SetMinimumDelay(0);
492   acm_receiver_.SetMaximumDelay(0);
493   acm_receiver_.FlushBuffers();
494 
495   _outputAudioLevel.ResetLevelFullRange();
496 
497   rtp_receive_statistics_->EnableRetransmitDetection(remote_ssrc_, true);
498   RtpRtcpInterface::Configuration configuration;
499   configuration.clock = clock;
500   configuration.audio = true;
501   configuration.receiver_only = true;
502   configuration.outgoing_transport = rtcp_send_transport;
503   configuration.receive_statistics = rtp_receive_statistics_.get();
504   configuration.event_log = event_log_;
505   configuration.local_media_ssrc = local_ssrc;
506 
507   if (frame_transformer)
508     InitFrameTransformerDelegate(std::move(frame_transformer));
509 
510   rtp_rtcp_ = ModuleRtpRtcpImpl2::Create(configuration);
511   rtp_rtcp_->SetSendingMediaStatus(false);
512   rtp_rtcp_->SetRemoteSSRC(remote_ssrc_);
513 
514   _moduleProcessThreadPtr->RegisterModule(rtp_rtcp_.get(), RTC_FROM_HERE);
515 
516   // Ensure that RTCP is enabled for the created channel.
517   rtp_rtcp_->SetRTCPStatus(RtcpMode::kCompound);
518 }
519 
~ChannelReceive()520 ChannelReceive::~ChannelReceive() {
521   RTC_DCHECK(construction_thread_.IsCurrent());
522 
523   // Resets the delegate's callback to ChannelReceive::OnReceivedPayloadData.
524   if (frame_transformer_delegate_)
525     frame_transformer_delegate_->Reset();
526 
527   StopPlayout();
528 
529   if (_moduleProcessThreadPtr)
530     _moduleProcessThreadPtr->DeRegisterModule(rtp_rtcp_.get());
531 }
532 
SetSink(AudioSinkInterface * sink)533 void ChannelReceive::SetSink(AudioSinkInterface* sink) {
534   RTC_DCHECK(worker_thread_checker_.IsCurrent());
535   MutexLock lock(&callback_mutex_);
536   audio_sink_ = sink;
537 }
538 
StartPlayout()539 void ChannelReceive::StartPlayout() {
540   RTC_DCHECK(worker_thread_checker_.IsCurrent());
541   MutexLock lock(&playing_lock_);
542   playing_ = true;
543 }
544 
StopPlayout()545 void ChannelReceive::StopPlayout() {
546   RTC_DCHECK(worker_thread_checker_.IsCurrent());
547   MutexLock lock(&playing_lock_);
548   playing_ = false;
549   _outputAudioLevel.ResetLevelFullRange();
550 }
551 
GetReceiveCodec() const552 absl::optional<std::pair<int, SdpAudioFormat>> ChannelReceive::GetReceiveCodec()
553     const {
554   RTC_DCHECK(worker_thread_checker_.IsCurrent());
555   return acm_receiver_.LastDecoder();
556 }
557 
SetReceiveCodecs(const std::map<int,SdpAudioFormat> & codecs)558 void ChannelReceive::SetReceiveCodecs(
559     const std::map<int, SdpAudioFormat>& codecs) {
560   RTC_DCHECK(worker_thread_checker_.IsCurrent());
561   for (const auto& kv : codecs) {
562     RTC_DCHECK_GE(kv.second.clockrate_hz, 1000);
563     payload_type_frequencies_[kv.first] = kv.second.clockrate_hz;
564   }
565   acm_receiver_.SetCodecs(codecs);
566 }
567 
568 // May be called on either worker thread or network thread.
OnRtpPacket(const RtpPacketReceived & packet)569 void ChannelReceive::OnRtpPacket(const RtpPacketReceived& packet) {
570   int64_t now_ms = rtc::TimeMillis();
571 
572   {
573     MutexLock lock(&sync_info_lock_);
574     last_received_rtp_timestamp_ = packet.Timestamp();
575     last_received_rtp_system_time_ms_ = now_ms;
576   }
577 
578   // Store playout timestamp for the received RTP packet
579   UpdatePlayoutTimestamp(false, now_ms);
580 
581   const auto& it = payload_type_frequencies_.find(packet.PayloadType());
582   if (it == payload_type_frequencies_.end())
583     return;
584   // TODO(nisse): Set payload_type_frequency earlier, when packet is parsed.
585   RtpPacketReceived packet_copy(packet);
586   packet_copy.set_payload_type_frequency(it->second);
587 
588   rtp_receive_statistics_->OnRtpPacket(packet_copy);
589 
590   RTPHeader header;
591   packet_copy.GetHeader(&header);
592 
593   // Interpolates absolute capture timestamp RTP header extension.
594   header.extension.absolute_capture_time =
595       absolute_capture_time_receiver_.OnReceivePacket(
596           AbsoluteCaptureTimeReceiver::GetSource(header.ssrc,
597                                                  header.arrOfCSRCs),
598           header.timestamp,
599           rtc::saturated_cast<uint32_t>(packet_copy.payload_type_frequency()),
600           header.extension.absolute_capture_time);
601 
602   ReceivePacket(packet_copy.data(), packet_copy.size(), header);
603 }
604 
ReceivePacket(const uint8_t * packet,size_t packet_length,const RTPHeader & header)605 void ChannelReceive::ReceivePacket(const uint8_t* packet,
606                                    size_t packet_length,
607                                    const RTPHeader& header) {
608   const uint8_t* payload = packet + header.headerLength;
609   assert(packet_length >= header.headerLength);
610   size_t payload_length = packet_length - header.headerLength;
611 
612   size_t payload_data_length = payload_length - header.paddingLength;
613 
614   // E2EE Custom Audio Frame Decryption (This is optional).
615   // Keep this buffer around for the lifetime of the OnReceivedPayloadData call.
616   rtc::Buffer decrypted_audio_payload;
617   if (frame_decryptor_ != nullptr) {
618     const size_t max_plaintext_size = frame_decryptor_->GetMaxPlaintextByteSize(
619         cricket::MEDIA_TYPE_AUDIO, payload_length);
620     decrypted_audio_payload.SetSize(max_plaintext_size);
621 
622     const std::vector<uint32_t> csrcs(header.arrOfCSRCs,
623                                       header.arrOfCSRCs + header.numCSRCs);
624     const FrameDecryptorInterface::Result decrypt_result =
625         frame_decryptor_->Decrypt(
626             cricket::MEDIA_TYPE_AUDIO, csrcs,
627             /*additional_data=*/nullptr,
628             rtc::ArrayView<const uint8_t>(payload, payload_data_length),
629             decrypted_audio_payload);
630 
631     if (decrypt_result.IsOk()) {
632       decrypted_audio_payload.SetSize(decrypt_result.bytes_written);
633     } else {
634       // Interpret failures as a silent frame.
635       decrypted_audio_payload.SetSize(0);
636     }
637 
638     payload = decrypted_audio_payload.data();
639     payload_data_length = decrypted_audio_payload.size();
640   } else if (crypto_options_.sframe.require_frame_encryption) {
641     RTC_DLOG(LS_ERROR)
642         << "FrameDecryptor required but not set, dropping packet";
643     payload_data_length = 0;
644   }
645 
646   rtc::ArrayView<const uint8_t> payload_data(payload, payload_data_length);
647   if (frame_transformer_delegate_) {
648     // Asynchronously transform the received payload. After the payload is
649     // transformed, the delegate will call OnReceivedPayloadData to handle it.
650     frame_transformer_delegate_->Transform(payload_data, header, remote_ssrc_);
651   } else {
652     OnReceivedPayloadData(payload_data, header);
653   }
654 }
655 
656 // May be called on either worker thread or network thread.
ReceivedRTCPPacket(const uint8_t * data,size_t length)657 void ChannelReceive::ReceivedRTCPPacket(const uint8_t* data, size_t length) {
658   // Store playout timestamp for the received RTCP packet
659   UpdatePlayoutTimestamp(true, rtc::TimeMillis());
660 
661   // Deliver RTCP packet to RTP/RTCP module for parsing
662   rtp_rtcp_->IncomingRtcpPacket(data, length);
663 
664   int64_t rtt = GetRTT();
665   if (rtt == 0) {
666     // Waiting for valid RTT.
667     return;
668   }
669 
670   uint32_t ntp_secs = 0;
671   uint32_t ntp_frac = 0;
672   uint32_t rtp_timestamp = 0;
673   if (0 !=
674       rtp_rtcp_->RemoteNTP(&ntp_secs, &ntp_frac, NULL, NULL, &rtp_timestamp)) {
675     // Waiting for RTCP.
676     return;
677   }
678 
679   {
680     MutexLock lock(&ts_stats_lock_);
681     ntp_estimator_.UpdateRtcpTimestamp(rtt, ntp_secs, ntp_frac, rtp_timestamp);
682   }
683 }
684 
GetSpeechOutputLevelFullRange() const685 int ChannelReceive::GetSpeechOutputLevelFullRange() const {
686   RTC_DCHECK(worker_thread_checker_.IsCurrent());
687   return _outputAudioLevel.LevelFullRange();
688 }
689 
GetTotalOutputEnergy() const690 double ChannelReceive::GetTotalOutputEnergy() const {
691   RTC_DCHECK(worker_thread_checker_.IsCurrent());
692   return _outputAudioLevel.TotalEnergy();
693 }
694 
GetTotalOutputDuration() const695 double ChannelReceive::GetTotalOutputDuration() const {
696   RTC_DCHECK(worker_thread_checker_.IsCurrent());
697   return _outputAudioLevel.TotalDuration();
698 }
699 
SetChannelOutputVolumeScaling(float scaling)700 void ChannelReceive::SetChannelOutputVolumeScaling(float scaling) {
701   RTC_DCHECK(worker_thread_checker_.IsCurrent());
702   MutexLock lock(&volume_settings_mutex_);
703   _outputGain = scaling;
704 }
705 
RegisterReceiverCongestionControlObjects(PacketRouter * packet_router)706 void ChannelReceive::RegisterReceiverCongestionControlObjects(
707     PacketRouter* packet_router) {
708   RTC_DCHECK(worker_thread_checker_.IsCurrent());
709   RTC_DCHECK(packet_router);
710   RTC_DCHECK(!packet_router_);
711   constexpr bool remb_candidate = false;
712   packet_router->AddReceiveRtpModule(rtp_rtcp_.get(), remb_candidate);
713   packet_router_ = packet_router;
714 }
715 
ResetReceiverCongestionControlObjects()716 void ChannelReceive::ResetReceiverCongestionControlObjects() {
717   RTC_DCHECK(worker_thread_checker_.IsCurrent());
718   RTC_DCHECK(packet_router_);
719   packet_router_->RemoveReceiveRtpModule(rtp_rtcp_.get());
720   packet_router_ = nullptr;
721 }
722 
GetRTCPStatistics() const723 CallReceiveStatistics ChannelReceive::GetRTCPStatistics() const {
724   RTC_DCHECK(worker_thread_checker_.IsCurrent());
725   // --- RtcpStatistics
726   CallReceiveStatistics stats;
727 
728   // The jitter statistics is updated for each received RTP packet and is
729   // based on received packets.
730   RtpReceiveStats rtp_stats;
731   StreamStatistician* statistician =
732       rtp_receive_statistics_->GetStatistician(remote_ssrc_);
733   if (statistician) {
734     rtp_stats = statistician->GetStats();
735   }
736 
737   stats.cumulativeLost = rtp_stats.packets_lost;
738   stats.jitterSamples = rtp_stats.jitter;
739 
740   // --- RTT
741   stats.rttMs = GetRTT();
742 
743   // --- Data counters
744   if (statistician) {
745     stats.payload_bytes_rcvd = rtp_stats.packet_counter.payload_bytes;
746 
747     stats.header_and_padding_bytes_rcvd =
748         rtp_stats.packet_counter.header_bytes +
749         rtp_stats.packet_counter.padding_bytes;
750     stats.packetsReceived = rtp_stats.packet_counter.packets;
751     stats.last_packet_received_timestamp_ms =
752         rtp_stats.last_packet_received_timestamp_ms;
753   } else {
754     stats.payload_bytes_rcvd = 0;
755     stats.header_and_padding_bytes_rcvd = 0;
756     stats.packetsReceived = 0;
757     stats.last_packet_received_timestamp_ms = absl::nullopt;
758   }
759 
760   // --- Timestamps
761   {
762     MutexLock lock(&ts_stats_lock_);
763     stats.capture_start_ntp_time_ms_ = capture_start_ntp_time_ms_;
764   }
765   return stats;
766 }
767 
SetNACKStatus(bool enable,int max_packets)768 void ChannelReceive::SetNACKStatus(bool enable, int max_packets) {
769   RTC_DCHECK(worker_thread_checker_.IsCurrent());
770   // None of these functions can fail.
771   if (enable) {
772     rtp_receive_statistics_->SetMaxReorderingThreshold(max_packets);
773     acm_receiver_.EnableNack(max_packets);
774   } else {
775     rtp_receive_statistics_->SetMaxReorderingThreshold(
776         kDefaultMaxReorderingThreshold);
777     acm_receiver_.DisableNack();
778   }
779 }
780 
781 // Called when we are missing one or more packets.
ResendPackets(const uint16_t * sequence_numbers,int length)782 int ChannelReceive::ResendPackets(const uint16_t* sequence_numbers,
783                                   int length) {
784   return rtp_rtcp_->SendNACK(sequence_numbers, length);
785 }
786 
SetAssociatedSendChannel(const ChannelSendInterface * channel)787 void ChannelReceive::SetAssociatedSendChannel(
788     const ChannelSendInterface* channel) {
789   RTC_DCHECK(worker_thread_checker_.IsCurrent());
790   MutexLock lock(&assoc_send_channel_lock_);
791   associated_send_channel_ = channel;
792 }
793 
SetDepacketizerToDecoderFrameTransformer(rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer)794 void ChannelReceive::SetDepacketizerToDecoderFrameTransformer(
795     rtc::scoped_refptr<webrtc::FrameTransformerInterface> frame_transformer) {
796   RTC_DCHECK(worker_thread_checker_.IsCurrent());
797   // Depending on when the channel is created, the transformer might be set
798   // twice. Don't replace the delegate if it was already initialized.
799   if (!frame_transformer || frame_transformer_delegate_)
800     return;
801   InitFrameTransformerDelegate(std::move(frame_transformer));
802 }
803 
GetNetworkStatistics() const804 NetworkStatistics ChannelReceive::GetNetworkStatistics() const {
805   RTC_DCHECK(worker_thread_checker_.IsCurrent());
806   NetworkStatistics stats;
807   acm_receiver_.GetNetworkStatistics(&stats);
808   return stats;
809 }
810 
GetDecodingCallStatistics() const811 AudioDecodingCallStats ChannelReceive::GetDecodingCallStatistics() const {
812   RTC_DCHECK(worker_thread_checker_.IsCurrent());
813   AudioDecodingCallStats stats;
814   acm_receiver_.GetDecodingCallStatistics(&stats);
815   return stats;
816 }
817 
GetDelayEstimate() const818 uint32_t ChannelReceive::GetDelayEstimate() const {
819   RTC_DCHECK(worker_thread_checker_.IsCurrent() ||
820              module_process_thread_checker_.IsCurrent());
821   MutexLock lock(&video_sync_lock_);
822   return acm_receiver_.FilteredCurrentDelayMs() + playout_delay_ms_;
823 }
824 
SetMinimumPlayoutDelay(int delay_ms)825 void ChannelReceive::SetMinimumPlayoutDelay(int delay_ms) {
826   RTC_DCHECK(module_process_thread_checker_.IsCurrent());
827   // Limit to range accepted by both VoE and ACM, so we're at least getting as
828   // close as possible, instead of failing.
829   delay_ms = rtc::SafeClamp(delay_ms, kVoiceEngineMinMinPlayoutDelayMs,
830                             kVoiceEngineMaxMinPlayoutDelayMs);
831   if (acm_receiver_.SetMinimumDelay(delay_ms) != 0) {
832     RTC_DLOG(LS_ERROR)
833         << "SetMinimumPlayoutDelay() failed to set min playout delay";
834   }
835 }
836 
GetPlayoutRtpTimestamp(uint32_t * rtp_timestamp,int64_t * time_ms) const837 bool ChannelReceive::GetPlayoutRtpTimestamp(uint32_t* rtp_timestamp,
838                                             int64_t* time_ms) const {
839   RTC_DCHECK_RUNS_SERIALIZED(&video_capture_thread_race_checker_);
840   {
841     MutexLock lock(&video_sync_lock_);
842     if (!playout_timestamp_rtp_time_ms_)
843       return false;
844     *rtp_timestamp = playout_timestamp_rtp_;
845     *time_ms = playout_timestamp_rtp_time_ms_.value();
846     return true;
847   }
848 }
849 
SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms,int64_t time_ms)850 void ChannelReceive::SetEstimatedPlayoutNtpTimestampMs(int64_t ntp_timestamp_ms,
851                                                        int64_t time_ms) {
852   RTC_DCHECK_RUNS_SERIALIZED(&video_capture_thread_race_checker_);
853   MutexLock lock(&video_sync_lock_);
854   playout_timestamp_ntp_ = ntp_timestamp_ms;
855   playout_timestamp_ntp_time_ms_ = time_ms;
856 }
857 
858 absl::optional<int64_t>
GetCurrentEstimatedPlayoutNtpTimestampMs(int64_t now_ms) const859 ChannelReceive::GetCurrentEstimatedPlayoutNtpTimestampMs(int64_t now_ms) const {
860   RTC_DCHECK(worker_thread_checker_.IsCurrent());
861   MutexLock lock(&video_sync_lock_);
862   if (!playout_timestamp_ntp_ || !playout_timestamp_ntp_time_ms_)
863     return absl::nullopt;
864 
865   int64_t elapsed_ms = now_ms - *playout_timestamp_ntp_time_ms_;
866   return *playout_timestamp_ntp_ + elapsed_ms;
867 }
868 
SetBaseMinimumPlayoutDelayMs(int delay_ms)869 bool ChannelReceive::SetBaseMinimumPlayoutDelayMs(int delay_ms) {
870   return acm_receiver_.SetBaseMinimumDelayMs(delay_ms);
871 }
872 
GetBaseMinimumPlayoutDelayMs() const873 int ChannelReceive::GetBaseMinimumPlayoutDelayMs() const {
874   return acm_receiver_.GetBaseMinimumDelayMs();
875 }
876 
GetSyncInfo() const877 absl::optional<Syncable::Info> ChannelReceive::GetSyncInfo() const {
878   RTC_DCHECK(module_process_thread_checker_.IsCurrent());
879   Syncable::Info info;
880   if (rtp_rtcp_->RemoteNTP(&info.capture_time_ntp_secs,
881                            &info.capture_time_ntp_frac, nullptr, nullptr,
882                            &info.capture_time_source_clock) != 0) {
883     return absl::nullopt;
884   }
885   {
886     MutexLock lock(&sync_info_lock_);
887     if (!last_received_rtp_timestamp_ || !last_received_rtp_system_time_ms_) {
888       return absl::nullopt;
889     }
890     info.latest_received_capture_timestamp = *last_received_rtp_timestamp_;
891     info.latest_receive_time_ms = *last_received_rtp_system_time_ms_;
892   }
893   return info;
894 }
895 
UpdatePlayoutTimestamp(bool rtcp,int64_t now_ms)896 void ChannelReceive::UpdatePlayoutTimestamp(bool rtcp, int64_t now_ms) {
897   jitter_buffer_playout_timestamp_ = acm_receiver_.GetPlayoutTimestamp();
898 
899   if (!jitter_buffer_playout_timestamp_) {
900     // This can happen if this channel has not received any RTP packets. In
901     // this case, NetEq is not capable of computing a playout timestamp.
902     return;
903   }
904 
905   uint16_t delay_ms = 0;
906   if (_audioDeviceModulePtr->PlayoutDelay(&delay_ms) == -1) {
907     RTC_DLOG(LS_WARNING)
908         << "ChannelReceive::UpdatePlayoutTimestamp() failed to read"
909            " playout delay from the ADM";
910     return;
911   }
912 
913   RTC_DCHECK(jitter_buffer_playout_timestamp_);
914   uint32_t playout_timestamp = *jitter_buffer_playout_timestamp_;
915 
916   // Remove the playout delay.
917   playout_timestamp -= (delay_ms * (GetRtpTimestampRateHz() / 1000));
918 
919   {
920     MutexLock lock(&video_sync_lock_);
921     if (!rtcp && playout_timestamp != playout_timestamp_rtp_) {
922       playout_timestamp_rtp_ = playout_timestamp;
923       playout_timestamp_rtp_time_ms_ = now_ms;
924     }
925     playout_delay_ms_ = delay_ms;
926   }
927 }
928 
GetRtpTimestampRateHz() const929 int ChannelReceive::GetRtpTimestampRateHz() const {
930   const auto decoder = acm_receiver_.LastDecoder();
931   // Default to the playout frequency if we've not gotten any packets yet.
932   // TODO(ossu): Zero clockrate can only happen if we've added an external
933   // decoder for a format we don't support internally. Remove once that way of
934   // adding decoders is gone!
935   // TODO(kwiberg): `decoder->second.clockrate_hz` is an RTP clockrate as it
936   // should, but `acm_receiver_.last_output_sample_rate_hz()` is a codec sample
937   // rate, which is not always the same thing.
938   return (decoder && decoder->second.clockrate_hz != 0)
939              ? decoder->second.clockrate_hz
940              : acm_receiver_.last_output_sample_rate_hz();
941 }
942 
GetRTT() const943 int64_t ChannelReceive::GetRTT() const {
944   std::vector<RTCPReportBlock> report_blocks;
945   rtp_rtcp_->RemoteRTCPStat(&report_blocks);
946 
947   // TODO(nisse): Could we check the return value from the ->RTT() call below,
948   // instead of checking if we have any report blocks?
949   if (report_blocks.empty()) {
950     MutexLock lock(&assoc_send_channel_lock_);
951     // Tries to get RTT from an associated channel.
952     if (!associated_send_channel_) {
953       return 0;
954     }
955     return associated_send_channel_->GetRTT();
956   }
957 
958   int64_t rtt = 0;
959   int64_t avg_rtt = 0;
960   int64_t max_rtt = 0;
961   int64_t min_rtt = 0;
962   // TODO(nisse): This method computes RTT based on sender reports, even though
963   // a receive stream is not supposed to do that.
964   if (rtp_rtcp_->RTT(remote_ssrc_, &rtt, &avg_rtt, &min_rtt, &max_rtt) != 0) {
965     return 0;
966   }
967   return rtt;
968 }
969 
970 }  // namespace
971 
CreateChannelReceive(Clock * clock,ProcessThread * module_process_thread,NetEqFactory * neteq_factory,AudioDeviceModule * audio_device_module,Transport * rtcp_send_transport,RtcEventLog * rtc_event_log,uint32_t local_ssrc,uint32_t remote_ssrc,size_t jitter_buffer_max_packets,bool jitter_buffer_fast_playout,int jitter_buffer_min_delay_ms,bool jitter_buffer_enable_rtx_handling,rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,absl::optional<AudioCodecPairId> codec_pair_id,rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,const webrtc::CryptoOptions & crypto_options,rtc::scoped_refptr<FrameTransformerInterface> frame_transformer)972 std::unique_ptr<ChannelReceiveInterface> CreateChannelReceive(
973     Clock* clock,
974     ProcessThread* module_process_thread,
975     NetEqFactory* neteq_factory,
976     AudioDeviceModule* audio_device_module,
977     Transport* rtcp_send_transport,
978     RtcEventLog* rtc_event_log,
979     uint32_t local_ssrc,
980     uint32_t remote_ssrc,
981     size_t jitter_buffer_max_packets,
982     bool jitter_buffer_fast_playout,
983     int jitter_buffer_min_delay_ms,
984     bool jitter_buffer_enable_rtx_handling,
985     rtc::scoped_refptr<AudioDecoderFactory> decoder_factory,
986     absl::optional<AudioCodecPairId> codec_pair_id,
987     rtc::scoped_refptr<FrameDecryptorInterface> frame_decryptor,
988     const webrtc::CryptoOptions& crypto_options,
989     rtc::scoped_refptr<FrameTransformerInterface> frame_transformer) {
990   return std::make_unique<ChannelReceive>(
991       clock, module_process_thread, neteq_factory, audio_device_module,
992       rtcp_send_transport, rtc_event_log, local_ssrc, remote_ssrc,
993       jitter_buffer_max_packets, jitter_buffer_fast_playout,
994       jitter_buffer_min_delay_ms, jitter_buffer_enable_rtx_handling,
995       decoder_factory, codec_pair_id, frame_decryptor, crypto_options,
996       std::move(frame_transformer));
997 }
998 
999 }  // namespace voe
1000 }  // namespace webrtc
1001