1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_coding/acm2/acm_receiver.h"
12 
13 #include <stdlib.h>  // malloc
14 
15 #include <algorithm>  // sort
16 #include <vector>
17 
18 #include "webrtc/base/checks.h"
19 #include "webrtc/base/format_macros.h"
20 #include "webrtc/base/logging.h"
21 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
22 #include "webrtc/common_types.h"
23 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
24 #include "webrtc/modules/audio_coding/acm2/acm_resampler.h"
25 #include "webrtc/modules/audio_coding/acm2/call_statistics.h"
26 #include "webrtc/modules/audio_coding/neteq/include/neteq.h"
27 #include "webrtc/system_wrappers/include/clock.h"
28 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
29 #include "webrtc/system_wrappers/include/tick_util.h"
30 #include "webrtc/system_wrappers/include/trace.h"
31 
32 namespace webrtc {
33 
34 namespace acm2 {
35 
36 namespace {
37 
38 // |vad_activity_| field of |audio_frame| is set to |previous_audio_activity_|
39 // before the call to this function.
SetAudioFrameActivityAndType(bool vad_enabled,NetEqOutputType type,AudioFrame * audio_frame)40 void SetAudioFrameActivityAndType(bool vad_enabled,
41                                   NetEqOutputType type,
42                                   AudioFrame* audio_frame) {
43   if (vad_enabled) {
44     switch (type) {
45       case kOutputNormal: {
46         audio_frame->vad_activity_ = AudioFrame::kVadActive;
47         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
48         break;
49       }
50       case kOutputVADPassive: {
51         audio_frame->vad_activity_ = AudioFrame::kVadPassive;
52         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
53         break;
54       }
55       case kOutputCNG: {
56         audio_frame->vad_activity_ = AudioFrame::kVadPassive;
57         audio_frame->speech_type_ = AudioFrame::kCNG;
58         break;
59       }
60       case kOutputPLC: {
61         // Don't change |audio_frame->vad_activity_|, it should be the same as
62         // |previous_audio_activity_|.
63         audio_frame->speech_type_ = AudioFrame::kPLC;
64         break;
65       }
66       case kOutputPLCtoCNG: {
67         audio_frame->vad_activity_ = AudioFrame::kVadPassive;
68         audio_frame->speech_type_ = AudioFrame::kPLCCNG;
69         break;
70       }
71       default:
72         assert(false);
73     }
74   } else {
75     // Always return kVadUnknown when receive VAD is inactive
76     audio_frame->vad_activity_ = AudioFrame::kVadUnknown;
77     switch (type) {
78       case kOutputNormal: {
79         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
80         break;
81       }
82       case kOutputCNG: {
83         audio_frame->speech_type_ = AudioFrame::kCNG;
84         break;
85       }
86       case kOutputPLC: {
87         audio_frame->speech_type_ = AudioFrame::kPLC;
88         break;
89       }
90       case kOutputPLCtoCNG: {
91         audio_frame->speech_type_ = AudioFrame::kPLCCNG;
92         break;
93       }
94       case kOutputVADPassive: {
95         // Normally, we should no get any VAD decision if post-decoding VAD is
96         // not active. However, if post-decoding VAD has been active then
97         // disabled, we might be here for couple of frames.
98         audio_frame->speech_type_ = AudioFrame::kNormalSpeech;
99         LOG(WARNING) << "Post-decoding VAD is disabled but output is "
100             << "labeled VAD-passive";
101         break;
102       }
103       default:
104         assert(false);
105     }
106   }
107 }
108 
109 // Is the given codec a CNG codec?
110 // TODO(kwiberg): Move to RentACodec.
IsCng(int codec_id)111 bool IsCng(int codec_id) {
112   auto i = RentACodec::CodecIdFromIndex(codec_id);
113   return (i && (*i == RentACodec::CodecId::kCNNB ||
114                 *i == RentACodec::CodecId::kCNWB ||
115                 *i == RentACodec::CodecId::kCNSWB ||
116                 *i == RentACodec::CodecId::kCNFB));
117 }
118 
119 }  // namespace
120 
AcmReceiver(const AudioCodingModule::Config & config)121 AcmReceiver::AcmReceiver(const AudioCodingModule::Config& config)
122     : crit_sect_(CriticalSectionWrapper::CreateCriticalSection()),
123       id_(config.id),
124       last_audio_decoder_(nullptr),
125       previous_audio_activity_(AudioFrame::kVadPassive),
126       audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
127       last_audio_buffer_(new int16_t[AudioFrame::kMaxDataSizeSamples]),
128       neteq_(NetEq::Create(config.neteq_config)),
129       vad_enabled_(config.neteq_config.enable_post_decode_vad),
130       clock_(config.clock),
131       resampled_last_output_frame_(true) {
132   assert(clock_);
133   memset(audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
134   memset(last_audio_buffer_.get(), 0, AudioFrame::kMaxDataSizeSamples);
135 }
136 
~AcmReceiver()137 AcmReceiver::~AcmReceiver() {
138   delete neteq_;
139 }
140 
SetMinimumDelay(int delay_ms)141 int AcmReceiver::SetMinimumDelay(int delay_ms) {
142   if (neteq_->SetMinimumDelay(delay_ms))
143     return 0;
144   LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
145   return -1;
146 }
147 
SetMaximumDelay(int delay_ms)148 int AcmReceiver::SetMaximumDelay(int delay_ms) {
149   if (neteq_->SetMaximumDelay(delay_ms))
150     return 0;
151   LOG(LERROR) << "AcmReceiver::SetExtraDelay " << delay_ms;
152   return -1;
153 }
154 
LeastRequiredDelayMs() const155 int AcmReceiver::LeastRequiredDelayMs() const {
156   return neteq_->LeastRequiredDelayMs();
157 }
158 
last_packet_sample_rate_hz() const159 rtc::Optional<int> AcmReceiver::last_packet_sample_rate_hz() const {
160   CriticalSectionScoped lock(crit_sect_.get());
161   return last_packet_sample_rate_hz_;
162 }
163 
last_output_sample_rate_hz() const164 int AcmReceiver::last_output_sample_rate_hz() const {
165   return neteq_->last_output_sample_rate_hz();
166 }
167 
InsertPacket(const WebRtcRTPHeader & rtp_header,rtc::ArrayView<const uint8_t> incoming_payload)168 int AcmReceiver::InsertPacket(const WebRtcRTPHeader& rtp_header,
169                               rtc::ArrayView<const uint8_t> incoming_payload) {
170   uint32_t receive_timestamp = 0;
171   const RTPHeader* header = &rtp_header.header;  // Just a shorthand.
172 
173   {
174     CriticalSectionScoped lock(crit_sect_.get());
175 
176     const Decoder* decoder = RtpHeaderToDecoder(*header, incoming_payload[0]);
177     if (!decoder) {
178       LOG_F(LS_ERROR) << "Payload-type "
179                       << static_cast<int>(header->payloadType)
180                       << " is not registered.";
181       return -1;
182     }
183     const int sample_rate_hz = [&decoder] {
184       const auto ci = RentACodec::CodecIdFromIndex(decoder->acm_codec_id);
185       return ci ? RentACodec::CodecInstById(*ci)->plfreq : -1;
186     }();
187     receive_timestamp = NowInTimestamp(sample_rate_hz);
188 
189     // If this is a CNG while the audio codec is not mono, skip pushing in
190     // packets into NetEq.
191     if (IsCng(decoder->acm_codec_id) && last_audio_decoder_ &&
192         last_audio_decoder_->channels > 1)
193         return 0;
194     if (!IsCng(decoder->acm_codec_id) &&
195         decoder->acm_codec_id !=
196             *RentACodec::CodecIndexFromId(RentACodec::CodecId::kAVT)) {
197       last_audio_decoder_ = decoder;
198       last_packet_sample_rate_hz_ = rtc::Optional<int>(decoder->sample_rate_hz);
199     }
200 
201   }  // |crit_sect_| is released.
202 
203   if (neteq_->InsertPacket(rtp_header, incoming_payload, receive_timestamp) <
204       0) {
205     LOG(LERROR) << "AcmReceiver::InsertPacket "
206                 << static_cast<int>(header->payloadType)
207                 << " Failed to insert packet";
208     return -1;
209   }
210   return 0;
211 }
212 
GetAudio(int desired_freq_hz,AudioFrame * audio_frame)213 int AcmReceiver::GetAudio(int desired_freq_hz, AudioFrame* audio_frame) {
214   enum NetEqOutputType type;
215   size_t samples_per_channel;
216   size_t num_channels;
217 
218   // Accessing members, take the lock.
219   CriticalSectionScoped lock(crit_sect_.get());
220 
221   // Always write the output to |audio_buffer_| first.
222   if (neteq_->GetAudio(AudioFrame::kMaxDataSizeSamples,
223                        audio_buffer_.get(),
224                        &samples_per_channel,
225                        &num_channels,
226                        &type) != NetEq::kOK) {
227     LOG(LERROR) << "AcmReceiver::GetAudio - NetEq Failed.";
228     return -1;
229   }
230 
231   const int current_sample_rate_hz = neteq_->last_output_sample_rate_hz();
232 
233   // Update if resampling is required.
234   const bool need_resampling =
235       (desired_freq_hz != -1) && (current_sample_rate_hz != desired_freq_hz);
236 
237   if (need_resampling && !resampled_last_output_frame_) {
238     // Prime the resampler with the last frame.
239     int16_t temp_output[AudioFrame::kMaxDataSizeSamples];
240     int samples_per_channel_int = resampler_.Resample10Msec(
241         last_audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
242         num_channels, AudioFrame::kMaxDataSizeSamples, temp_output);
243     if (samples_per_channel_int < 0) {
244       LOG(LERROR) << "AcmReceiver::GetAudio - "
245                      "Resampling last_audio_buffer_ failed.";
246       return -1;
247     }
248     samples_per_channel = static_cast<size_t>(samples_per_channel_int);
249   }
250 
251   // The audio in |audio_buffer_| is tansferred to |audio_frame_| below, either
252   // through resampling, or through straight memcpy.
253   // TODO(henrik.lundin) Glitches in the output may appear if the output rate
254   // from NetEq changes. See WebRTC issue 3923.
255   if (need_resampling) {
256     int samples_per_channel_int = resampler_.Resample10Msec(
257         audio_buffer_.get(), current_sample_rate_hz, desired_freq_hz,
258         num_channels, AudioFrame::kMaxDataSizeSamples, audio_frame->data_);
259     if (samples_per_channel_int < 0) {
260       LOG(LERROR) << "AcmReceiver::GetAudio - Resampling audio_buffer_ failed.";
261       return -1;
262     }
263     samples_per_channel = static_cast<size_t>(samples_per_channel_int);
264     resampled_last_output_frame_ = true;
265   } else {
266     resampled_last_output_frame_ = false;
267     // We might end up here ONLY if codec is changed.
268     memcpy(audio_frame->data_,
269            audio_buffer_.get(),
270            samples_per_channel * num_channels * sizeof(int16_t));
271   }
272 
273   // Swap buffers, so that the current audio is stored in |last_audio_buffer_|
274   // for next time.
275   audio_buffer_.swap(last_audio_buffer_);
276 
277   audio_frame->num_channels_ = num_channels;
278   audio_frame->samples_per_channel_ = samples_per_channel;
279   audio_frame->sample_rate_hz_ = static_cast<int>(samples_per_channel * 100);
280 
281   // Should set |vad_activity| before calling SetAudioFrameActivityAndType().
282   audio_frame->vad_activity_ = previous_audio_activity_;
283   SetAudioFrameActivityAndType(vad_enabled_, type, audio_frame);
284   previous_audio_activity_ = audio_frame->vad_activity_;
285   call_stats_.DecodedByNetEq(audio_frame->speech_type_);
286 
287   // Computes the RTP timestamp of the first sample in |audio_frame| from
288   // |GetPlayoutTimestamp|, which is the timestamp of the last sample of
289   // |audio_frame|.
290   uint32_t playout_timestamp = 0;
291   if (GetPlayoutTimestamp(&playout_timestamp)) {
292     audio_frame->timestamp_ = playout_timestamp -
293         static_cast<uint32_t>(audio_frame->samples_per_channel_);
294   } else {
295     // Remain 0 until we have a valid |playout_timestamp|.
296     audio_frame->timestamp_ = 0;
297   }
298 
299   return 0;
300 }
301 
AddCodec(int acm_codec_id,uint8_t payload_type,size_t channels,int sample_rate_hz,AudioDecoder * audio_decoder,const std::string & name)302 int32_t AcmReceiver::AddCodec(int acm_codec_id,
303                               uint8_t payload_type,
304                               size_t channels,
305                               int sample_rate_hz,
306                               AudioDecoder* audio_decoder,
307                               const std::string& name) {
308   const auto neteq_decoder = [acm_codec_id, channels]() -> NetEqDecoder {
309     if (acm_codec_id == -1)
310       return NetEqDecoder::kDecoderArbitrary;  // External decoder.
311     const rtc::Optional<RentACodec::CodecId> cid =
312         RentACodec::CodecIdFromIndex(acm_codec_id);
313     RTC_DCHECK(cid) << "Invalid codec index: " << acm_codec_id;
314     const rtc::Optional<NetEqDecoder> ned =
315         RentACodec::NetEqDecoderFromCodecId(*cid, channels);
316     RTC_DCHECK(ned) << "Invalid codec ID: " << static_cast<int>(*cid);
317     return *ned;
318   }();
319 
320   CriticalSectionScoped lock(crit_sect_.get());
321 
322   // The corresponding NetEq decoder ID.
323   // If this codec has been registered before.
324   auto it = decoders_.find(payload_type);
325   if (it != decoders_.end()) {
326     const Decoder& decoder = it->second;
327     if (acm_codec_id != -1 && decoder.acm_codec_id == acm_codec_id &&
328         decoder.channels == channels &&
329         decoder.sample_rate_hz == sample_rate_hz) {
330       // Re-registering the same codec. Do nothing and return.
331       return 0;
332     }
333 
334     // Changing codec. First unregister the old codec, then register the new
335     // one.
336     if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
337       LOG(LERROR) << "Cannot remove payload " << static_cast<int>(payload_type);
338       return -1;
339     }
340 
341     decoders_.erase(it);
342   }
343 
344   int ret_val;
345   if (!audio_decoder) {
346     ret_val = neteq_->RegisterPayloadType(neteq_decoder, name, payload_type);
347   } else {
348     ret_val = neteq_->RegisterExternalDecoder(
349         audio_decoder, neteq_decoder, name, payload_type, sample_rate_hz);
350   }
351   if (ret_val != NetEq::kOK) {
352     LOG(LERROR) << "AcmReceiver::AddCodec " << acm_codec_id
353                 << static_cast<int>(payload_type)
354                 << " channels: " << channels;
355     return -1;
356   }
357 
358   Decoder decoder;
359   decoder.acm_codec_id = acm_codec_id;
360   decoder.payload_type = payload_type;
361   decoder.channels = channels;
362   decoder.sample_rate_hz = sample_rate_hz;
363   decoders_[payload_type] = decoder;
364   return 0;
365 }
366 
EnableVad()367 void AcmReceiver::EnableVad() {
368   neteq_->EnableVad();
369   CriticalSectionScoped lock(crit_sect_.get());
370   vad_enabled_ = true;
371 }
372 
DisableVad()373 void AcmReceiver::DisableVad() {
374   neteq_->DisableVad();
375   CriticalSectionScoped lock(crit_sect_.get());
376   vad_enabled_ = false;
377 }
378 
FlushBuffers()379 void AcmReceiver::FlushBuffers() {
380   neteq_->FlushBuffers();
381 }
382 
383 // If failed in removing one of the codecs, this method continues to remove as
384 // many as it can.
RemoveAllCodecs()385 int AcmReceiver::RemoveAllCodecs() {
386   int ret_val = 0;
387   CriticalSectionScoped lock(crit_sect_.get());
388   for (auto it = decoders_.begin(); it != decoders_.end(); ) {
389     auto cur = it;
390     ++it;  // it will be valid even if we erase cur
391     if (neteq_->RemovePayloadType(cur->second.payload_type) == 0) {
392       decoders_.erase(cur);
393     } else {
394       LOG_F(LS_ERROR) << "Cannot remove payload "
395                       << static_cast<int>(cur->second.payload_type);
396       ret_val = -1;
397     }
398   }
399 
400   // No codec is registered, invalidate last audio decoder.
401   last_audio_decoder_ = nullptr;
402   last_packet_sample_rate_hz_ = rtc::Optional<int>();
403   return ret_val;
404 }
405 
RemoveCodec(uint8_t payload_type)406 int AcmReceiver::RemoveCodec(uint8_t payload_type) {
407   CriticalSectionScoped lock(crit_sect_.get());
408   auto it = decoders_.find(payload_type);
409   if (it == decoders_.end()) {  // Such a payload-type is not registered.
410     return 0;
411   }
412   if (neteq_->RemovePayloadType(payload_type) != NetEq::kOK) {
413     LOG(LERROR) << "AcmReceiver::RemoveCodec" << static_cast<int>(payload_type);
414     return -1;
415   }
416   if (last_audio_decoder_ == &it->second) {
417     last_audio_decoder_ = nullptr;
418     last_packet_sample_rate_hz_ = rtc::Optional<int>();
419   }
420   decoders_.erase(it);
421   return 0;
422 }
423 
set_id(int id)424 void AcmReceiver::set_id(int id) {
425   CriticalSectionScoped lock(crit_sect_.get());
426   id_ = id;
427 }
428 
GetPlayoutTimestamp(uint32_t * timestamp)429 bool AcmReceiver::GetPlayoutTimestamp(uint32_t* timestamp) {
430   return neteq_->GetPlayoutTimestamp(timestamp);
431 }
432 
LastAudioCodec(CodecInst * codec) const433 int AcmReceiver::LastAudioCodec(CodecInst* codec) const {
434   CriticalSectionScoped lock(crit_sect_.get());
435   if (!last_audio_decoder_) {
436     return -1;
437   }
438   *codec = *RentACodec::CodecInstById(
439       *RentACodec::CodecIdFromIndex(last_audio_decoder_->acm_codec_id));
440   codec->pltype = last_audio_decoder_->payload_type;
441   codec->channels = last_audio_decoder_->channels;
442   codec->plfreq = last_audio_decoder_->sample_rate_hz;
443   return 0;
444 }
445 
GetNetworkStatistics(NetworkStatistics * acm_stat)446 void AcmReceiver::GetNetworkStatistics(NetworkStatistics* acm_stat) {
447   NetEqNetworkStatistics neteq_stat;
448   // NetEq function always returns zero, so we don't check the return value.
449   neteq_->NetworkStatistics(&neteq_stat);
450 
451   acm_stat->currentBufferSize = neteq_stat.current_buffer_size_ms;
452   acm_stat->preferredBufferSize = neteq_stat.preferred_buffer_size_ms;
453   acm_stat->jitterPeaksFound = neteq_stat.jitter_peaks_found ? true : false;
454   acm_stat->currentPacketLossRate = neteq_stat.packet_loss_rate;
455   acm_stat->currentDiscardRate = neteq_stat.packet_discard_rate;
456   acm_stat->currentExpandRate = neteq_stat.expand_rate;
457   acm_stat->currentSpeechExpandRate = neteq_stat.speech_expand_rate;
458   acm_stat->currentPreemptiveRate = neteq_stat.preemptive_rate;
459   acm_stat->currentAccelerateRate = neteq_stat.accelerate_rate;
460   acm_stat->currentSecondaryDecodedRate = neteq_stat.secondary_decoded_rate;
461   acm_stat->clockDriftPPM = neteq_stat.clockdrift_ppm;
462   acm_stat->addedSamples = neteq_stat.added_zero_samples;
463   acm_stat->meanWaitingTimeMs = neteq_stat.mean_waiting_time_ms;
464   acm_stat->medianWaitingTimeMs = neteq_stat.median_waiting_time_ms;
465   acm_stat->minWaitingTimeMs = neteq_stat.min_waiting_time_ms;
466   acm_stat->maxWaitingTimeMs = neteq_stat.max_waiting_time_ms;
467 }
468 
DecoderByPayloadType(uint8_t payload_type,CodecInst * codec) const469 int AcmReceiver::DecoderByPayloadType(uint8_t payload_type,
470                                       CodecInst* codec) const {
471   CriticalSectionScoped lock(crit_sect_.get());
472   auto it = decoders_.find(payload_type);
473   if (it == decoders_.end()) {
474     LOG(LERROR) << "AcmReceiver::DecoderByPayloadType "
475                 << static_cast<int>(payload_type);
476     return -1;
477   }
478   const Decoder& decoder = it->second;
479   *codec = *RentACodec::CodecInstById(
480       *RentACodec::CodecIdFromIndex(decoder.acm_codec_id));
481   codec->pltype = decoder.payload_type;
482   codec->channels = decoder.channels;
483   codec->plfreq = decoder.sample_rate_hz;
484   return 0;
485 }
486 
EnableNack(size_t max_nack_list_size)487 int AcmReceiver::EnableNack(size_t max_nack_list_size) {
488   neteq_->EnableNack(max_nack_list_size);
489   return 0;
490 }
491 
DisableNack()492 void AcmReceiver::DisableNack() {
493   neteq_->DisableNack();
494 }
495 
GetNackList(int64_t round_trip_time_ms) const496 std::vector<uint16_t> AcmReceiver::GetNackList(
497     int64_t round_trip_time_ms) const {
498   return neteq_->GetNackList(round_trip_time_ms);
499 }
500 
ResetInitialDelay()501 void AcmReceiver::ResetInitialDelay() {
502   neteq_->SetMinimumDelay(0);
503   // TODO(turajs): Should NetEq Buffer be flushed?
504 }
505 
RtpHeaderToDecoder(const RTPHeader & rtp_header,uint8_t payload_type) const506 const AcmReceiver::Decoder* AcmReceiver::RtpHeaderToDecoder(
507     const RTPHeader& rtp_header,
508     uint8_t payload_type) const {
509   auto it = decoders_.find(rtp_header.payloadType);
510   const auto red_index =
511       RentACodec::CodecIndexFromId(RentACodec::CodecId::kRED);
512   if (red_index &&  // This ensures that RED is defined in WebRTC.
513       it != decoders_.end() && it->second.acm_codec_id == *red_index) {
514     // This is a RED packet, get the payload of the audio codec.
515     it = decoders_.find(payload_type & 0x7F);
516   }
517 
518   // Check if the payload is registered.
519   return it != decoders_.end() ? &it->second : nullptr;
520 }
521 
NowInTimestamp(int decoder_sampling_rate) const522 uint32_t AcmReceiver::NowInTimestamp(int decoder_sampling_rate) const {
523   // Down-cast the time to (32-6)-bit since we only care about
524   // the least significant bits. (32-6) bits cover 2^(32-6) = 67108864 ms.
525   // We masked 6 most significant bits of 32-bit so there is no overflow in
526   // the conversion from milliseconds to timestamp.
527   const uint32_t now_in_ms = static_cast<uint32_t>(
528       clock_->TimeInMilliseconds() & 0x03ffffff);
529   return static_cast<uint32_t>(
530       (decoder_sampling_rate / 1000) * now_in_ms);
531 }
532 
GetDecodingCallStatistics(AudioDecodingCallStats * stats) const533 void AcmReceiver::GetDecodingCallStatistics(
534     AudioDecodingCallStats* stats) const {
535   CriticalSectionScoped lock(crit_sect_.get());
536   *stats = call_stats_.GetDecodingStatistics();
537 }
538 
539 }  // namespace acm2
540 
541 }  // namespace webrtc
542