1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/neteq/decision_logic.h"
12 
13 #include <assert.h>
14 #include <stdio.h>
15 
16 #include <string>
17 
18 #include "absl/types/optional.h"
19 #include "modules/audio_coding/neteq/packet_buffer.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/experiments/field_trial_parser.h"
22 #include "rtc_base/logging.h"
23 #include "rtc_base/numerics/safe_conversions.h"
24 #include "system_wrappers/include/field_trial.h"
25 
26 namespace {
27 
28 constexpr int kPostponeDecodingLevel = 50;
29 constexpr int kDefaultTargetLevelWindowMs = 100;
30 
31 }  // namespace
32 
33 namespace webrtc {
34 
DecisionLogic(NetEqController::Config config)35 DecisionLogic::DecisionLogic(NetEqController::Config config)
36     : delay_manager_(DelayManager::Create(config.max_packets_in_buffer,
37                                           config.base_min_delay_ms,
38                                           config.enable_rtx_handling,
39                                           config.tick_timer)),
40       tick_timer_(config.tick_timer),
41       disallow_time_stretching_(!config.allow_time_stretching),
42       timescale_countdown_(
43           tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)),
44       estimate_dtx_delay_("estimate_dtx_delay", false),
45       time_stretch_cn_("time_stretch_cn", false),
46       target_level_window_ms_("target_level_window",
47                               kDefaultTargetLevelWindowMs,
48                               0,
49                               absl::nullopt) {
50   const std::string field_trial_name =
51       field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings");
52   ParseFieldTrial(
53       {&estimate_dtx_delay_, &time_stretch_cn_, &target_level_window_ms_},
54       field_trial_name);
55   RTC_LOG(LS_INFO) << "NetEq decision logic settings:"
56                       " estimate_dtx_delay="
57                    << estimate_dtx_delay_
58                    << " time_stretch_cn=" << time_stretch_cn_
59                    << " target_level_window_ms=" << target_level_window_ms_;
60 }
61 
62 DecisionLogic::~DecisionLogic() = default;
63 
Reset()64 void DecisionLogic::Reset() {
65   cng_state_ = kCngOff;
66   noise_fast_forward_ = 0;
67   packet_length_samples_ = 0;
68   sample_memory_ = 0;
69   prev_time_scale_ = false;
70   timescale_countdown_.reset();
71   num_consecutive_expands_ = 0;
72   time_stretched_cn_samples_ = 0;
73 }
74 
SoftReset()75 void DecisionLogic::SoftReset() {
76   packet_length_samples_ = 0;
77   sample_memory_ = 0;
78   prev_time_scale_ = false;
79   timescale_countdown_ =
80       tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1);
81   time_stretched_cn_samples_ = 0;
82   delay_manager_->Reset();
83   buffer_level_filter_.Reset();
84 }
85 
SetSampleRate(int fs_hz,size_t output_size_samples)86 void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) {
87   // TODO(hlundin): Change to an enumerator and skip assert.
88   assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000);
89   sample_rate_ = fs_hz;
90   output_size_samples_ = output_size_samples;
91 }
92 
GetDecision(const NetEqStatus & status,bool * reset_decoder)93 NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status,
94                                             bool* reset_decoder) {
95   // If last mode was CNG (or Expand, since this could be covering up for
96   // a lost CNG packet), remember that CNG is on. This is needed if comfort
97   // noise is interrupted by DTMF.
98   if (status.last_mode == NetEq::Mode::kRfc3389Cng) {
99     cng_state_ = kCngRfc3389On;
100   } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) {
101     cng_state_ = kCngInternalOn;
102   }
103 
104   size_t cur_size_samples = estimate_dtx_delay_
105                                 ? status.packet_buffer_info.span_samples
106                                 : status.packet_buffer_info.num_samples;
107   prev_time_scale_ =
108       prev_time_scale_ &&
109       (status.last_mode == NetEq::Mode::kAccelerateSuccess ||
110        status.last_mode == NetEq::Mode::kAccelerateLowEnergy ||
111        status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess ||
112        status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy);
113 
114   // Do not update buffer history if currently playing CNG since it will bias
115   // the filtered buffer level.
116   if (status.last_mode != NetEq::Mode::kRfc3389Cng &&
117       status.last_mode != NetEq::Mode::kCodecInternalCng &&
118       !(status.next_packet && status.next_packet->is_dtx &&
119         !estimate_dtx_delay_)) {
120     FilterBufferLevel(cur_size_samples);
121   }
122 
123   // Guard for errors, to avoid getting stuck in error mode.
124   if (status.last_mode == NetEq::Mode::kError) {
125     if (!status.next_packet) {
126       return NetEq::Operation::kExpand;
127     } else {
128       // Use kUndefined to flag for a reset.
129       return NetEq::Operation::kUndefined;
130     }
131   }
132 
133   if (status.next_packet && status.next_packet->is_cng) {
134     return CngOperation(status.last_mode, status.target_timestamp,
135                         status.next_packet->timestamp,
136                         status.generated_noise_samples);
137   }
138 
139   // Handle the case with no packet at all available (except maybe DTMF).
140   if (!status.next_packet) {
141     return NoPacket(status.play_dtmf);
142   }
143 
144   // If the expand period was very long, reset NetEQ since it is likely that the
145   // sender was restarted.
146   if (num_consecutive_expands_ > kReinitAfterExpands) {
147     *reset_decoder = true;
148     return NetEq::Operation::kNormal;
149   }
150 
151   // Make sure we don't restart audio too soon after an expansion to avoid
152   // running out of data right away again. We should only wait if there are no
153   // DTX or CNG packets in the buffer (otherwise we should just play out what we
154   // have, since we cannot know the exact duration of DTX or CNG packets), and
155   // if the mute factor is low enough (otherwise the expansion was short enough
156   // to not be noticable).
157   // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1.
158   const size_t current_span =
159       estimate_dtx_delay_ ? status.packet_buffer_info.span_samples
160                           : status.packet_buffer_info.span_samples_no_dtx;
161   if ((status.last_mode == NetEq::Mode::kExpand ||
162        status.last_mode == NetEq::Mode::kCodecPlc) &&
163       status.expand_mutefactor < 16384 / 2 &&
164       current_span<static_cast<size_t>(delay_manager_->TargetLevel() *
165                                        packet_length_samples_ *
166                                        kPostponeDecodingLevel / 100)>> 8 &&
167       !status.packet_buffer_info.dtx_or_cng) {
168     return NetEq::Operation::kExpand;
169   }
170 
171   const uint32_t five_seconds_samples = static_cast<uint32_t>(5 * sample_rate_);
172   // Check if the required packet is available.
173   if (status.target_timestamp == status.next_packet->timestamp) {
174     return ExpectedPacketAvailable(status.last_mode, status.play_dtmf);
175   } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp,
176                                                 status.target_timestamp,
177                                                 five_seconds_samples)) {
178     return FuturePacketAvailable(
179         status.last_packet_samples, status.last_mode, status.target_timestamp,
180         status.next_packet->timestamp, status.play_dtmf,
181         status.generated_noise_samples, status.packet_buffer_info.span_samples,
182         status.packet_buffer_info.num_packets);
183   } else {
184     // This implies that available_timestamp < target_timestamp, which can
185     // happen when a new stream or codec is received. Signal for a reset.
186     return NetEq::Operation::kUndefined;
187   }
188 }
189 
ExpandDecision(NetEq::Operation operation)190 void DecisionLogic::ExpandDecision(NetEq::Operation operation) {
191   if (operation == NetEq::Operation::kExpand) {
192     num_consecutive_expands_++;
193   } else {
194     num_consecutive_expands_ = 0;
195   }
196 }
197 
PacketArrived(bool last_cng_or_dtmf,size_t packet_length_samples,bool should_update_stats,uint16_t main_sequence_number,uint32_t main_timestamp,int fs_hz)198 absl::optional<int> DecisionLogic::PacketArrived(bool last_cng_or_dtmf,
199                                                  size_t packet_length_samples,
200                                                  bool should_update_stats,
201                                                  uint16_t main_sequence_number,
202                                                  uint32_t main_timestamp,
203                                                  int fs_hz) {
204   delay_manager_->LastDecodedWasCngOrDtmf(last_cng_or_dtmf);
205   absl::optional<int> relative_delay;
206   if (delay_manager_->last_pack_cng_or_dtmf() == 0) {
207     // Calculate the total speech length carried in each packet.
208     if (packet_length_samples > 0 &&
209         packet_length_samples != packet_length_samples_) {
210       packet_length_samples_ = packet_length_samples;
211       delay_manager_->SetPacketAudioLength(
212           rtc::dchecked_cast<int>((1000 * packet_length_samples) / fs_hz));
213     }
214 
215     // Update statistics.
216     if (should_update_stats) {
217       relative_delay =
218           delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz);
219     }
220   } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) {
221     // This is first "normal" packet after CNG or DTMF.
222     // Reset packet time counter and measure time until next packet,
223     // but don't update statistics.
224     delay_manager_->set_last_pack_cng_or_dtmf(0);
225     delay_manager_->ResetPacketIatCount();
226   }
227   return relative_delay;
228 }
229 
FilterBufferLevel(size_t buffer_size_samples)230 void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) {
231   buffer_level_filter_.SetTargetBufferLevel(
232       delay_manager_->base_target_level());
233 
234   int time_stretched_samples = time_stretched_cn_samples_;
235   if (prev_time_scale_) {
236     time_stretched_samples += sample_memory_;
237     timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval);
238   }
239 
240   buffer_level_filter_.Update(buffer_size_samples, time_stretched_samples);
241   prev_time_scale_ = false;
242   time_stretched_cn_samples_ = 0;
243 }
244 
CngOperation(NetEq::Mode prev_mode,uint32_t target_timestamp,uint32_t available_timestamp,size_t generated_noise_samples)245 NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode,
246                                              uint32_t target_timestamp,
247                                              uint32_t available_timestamp,
248                                              size_t generated_noise_samples) {
249   // Signed difference between target and available timestamp.
250   int32_t timestamp_diff = static_cast<int32_t>(
251       static_cast<uint32_t>(generated_noise_samples + target_timestamp) -
252       available_timestamp);
253   int32_t optimal_level_samp = static_cast<int32_t>(
254       (delay_manager_->TargetLevel() * packet_length_samples_) >> 8);
255   const int64_t excess_waiting_time_samp =
256       -static_cast<int64_t>(timestamp_diff) - optimal_level_samp;
257 
258   if (excess_waiting_time_samp > optimal_level_samp / 2) {
259     // The waiting time for this packet will be longer than 1.5
260     // times the wanted buffer delay. Apply fast-forward to cut the
261     // waiting time down to the optimal.
262     noise_fast_forward_ = rtc::saturated_cast<size_t>(noise_fast_forward_ +
263                                                       excess_waiting_time_samp);
264     timestamp_diff =
265         rtc::saturated_cast<int32_t>(timestamp_diff + excess_waiting_time_samp);
266   }
267 
268   if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) {
269     // Not time to play this packet yet. Wait another round before using this
270     // packet. Keep on playing CNG from previous CNG parameters.
271     return NetEq::Operation::kRfc3389CngNoPacket;
272   } else {
273     // Otherwise, go for the CNG packet now.
274     noise_fast_forward_ = 0;
275     return NetEq::Operation::kRfc3389Cng;
276   }
277 }
278 
NoPacket(bool play_dtmf)279 NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) {
280   if (cng_state_ == kCngRfc3389On) {
281     // Keep on playing comfort noise.
282     return NetEq::Operation::kRfc3389CngNoPacket;
283   } else if (cng_state_ == kCngInternalOn) {
284     // Keep on playing codec internal comfort noise.
285     return NetEq::Operation::kCodecInternalCng;
286   } else if (play_dtmf) {
287     return NetEq::Operation::kDtmf;
288   } else {
289     // Nothing to play, do expand.
290     return NetEq::Operation::kExpand;
291   }
292 }
293 
ExpectedPacketAvailable(NetEq::Mode prev_mode,bool play_dtmf)294 NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode,
295                                                         bool play_dtmf) {
296   if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand &&
297       !play_dtmf) {
298     // Check criterion for time-stretching. The values are in number of packets
299     // in Q8.
300     int low_limit, high_limit;
301     delay_manager_->BufferLimits(&low_limit, &high_limit);
302     int buffer_level_packets = 0;
303     if (packet_length_samples_ > 0) {
304       buffer_level_packets =
305           ((1 << 8) * buffer_level_filter_.filtered_current_level()) /
306           packet_length_samples_;
307     }
308     if (buffer_level_packets >= high_limit << 2)
309       return NetEq::Operation::kFastAccelerate;
310     if (TimescaleAllowed()) {
311       if (buffer_level_packets >= high_limit)
312         return NetEq::Operation::kAccelerate;
313       if (buffer_level_packets < low_limit)
314         return NetEq::Operation::kPreemptiveExpand;
315     }
316   }
317   return NetEq::Operation::kNormal;
318 }
319 
FuturePacketAvailable(size_t decoder_frame_length,NetEq::Mode prev_mode,uint32_t target_timestamp,uint32_t available_timestamp,bool play_dtmf,size_t generated_noise_samples,size_t span_samples_in_packet_buffer,size_t num_packets_in_packet_buffer)320 NetEq::Operation DecisionLogic::FuturePacketAvailable(
321     size_t decoder_frame_length,
322     NetEq::Mode prev_mode,
323     uint32_t target_timestamp,
324     uint32_t available_timestamp,
325     bool play_dtmf,
326     size_t generated_noise_samples,
327     size_t span_samples_in_packet_buffer,
328     size_t num_packets_in_packet_buffer) {
329   // Required packet is not available, but a future packet is.
330   // Check if we should continue with an ongoing expand because the new packet
331   // is too far into the future.
332   uint32_t timestamp_leap = available_timestamp - target_timestamp;
333   if ((prev_mode == NetEq::Mode::kExpand ||
334        prev_mode == NetEq::Mode::kCodecPlc) &&
335       !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() &&
336       PacketTooEarly(timestamp_leap) && UnderTargetLevel()) {
337     if (play_dtmf) {
338       // Still have DTMF to play, so do not do expand.
339       return NetEq::Operation::kDtmf;
340     } else {
341       // Nothing to play.
342       return NetEq::Operation::kExpand;
343     }
344   }
345 
346   if (prev_mode == NetEq::Mode::kCodecPlc) {
347     return NetEq::Operation::kNormal;
348   }
349 
350   // If previous was comfort noise, then no merge is needed.
351   if (prev_mode == NetEq::Mode::kRfc3389Cng ||
352       prev_mode == NetEq::Mode::kCodecInternalCng) {
353     size_t cur_size_samples =
354         estimate_dtx_delay_
355             ? cur_size_samples = span_samples_in_packet_buffer
356             : num_packets_in_packet_buffer * decoder_frame_length;
357     // Target level is in number of packets in Q8.
358     const size_t target_level_samples =
359         (delay_manager_->TargetLevel() * packet_length_samples_) >> 8;
360     const bool generated_enough_noise =
361         static_cast<uint32_t>(generated_noise_samples + target_timestamp) >=
362         available_timestamp;
363 
364     if (time_stretch_cn_) {
365       const size_t target_threshold_samples =
366           target_level_window_ms_ / 2 * (sample_rate_ / 1000);
367       const bool above_target_window =
368           cur_size_samples > target_level_samples + target_threshold_samples;
369       const bool below_target_window =
370           target_level_samples > target_threshold_samples &&
371           cur_size_samples < target_level_samples - target_threshold_samples;
372       // Keep the delay same as before CNG, but make sure that it is within the
373       // target window.
374       if ((generated_enough_noise && !below_target_window) ||
375           above_target_window) {
376         time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples;
377         return NetEq::Operation::kNormal;
378       }
379     } else {
380       // Keep the same delay as before the CNG, but make sure that the number of
381       // samples in buffer is no higher than 4 times the optimal level.
382       if (generated_enough_noise ||
383           cur_size_samples > target_level_samples * 4) {
384         // Time to play this new packet.
385         return NetEq::Operation::kNormal;
386       }
387     }
388 
389     // Too early to play this new packet; keep on playing comfort noise.
390     if (prev_mode == NetEq::Mode::kRfc3389Cng) {
391       return NetEq::Operation::kRfc3389CngNoPacket;
392     }
393     // prevPlayMode == kModeCodecInternalCng.
394     return NetEq::Operation::kCodecInternalCng;
395   }
396 
397   // Do not merge unless we have done an expand before.
398   if (prev_mode == NetEq::Mode::kExpand) {
399     return NetEq::Operation::kMerge;
400   } else if (play_dtmf) {
401     // Play DTMF instead of expand.
402     return NetEq::Operation::kDtmf;
403   } else {
404     return NetEq::Operation::kExpand;
405   }
406 }
407 
UnderTargetLevel() const408 bool DecisionLogic::UnderTargetLevel() const {
409   int buffer_level_packets = 0;
410   if (packet_length_samples_ > 0) {
411     buffer_level_packets =
412         ((1 << 8) * buffer_level_filter_.filtered_current_level()) /
413         packet_length_samples_;
414   }
415   return buffer_level_packets <= delay_manager_->TargetLevel();
416 }
417 
ReinitAfterExpands(uint32_t timestamp_leap) const418 bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const {
419   return timestamp_leap >=
420          static_cast<uint32_t>(output_size_samples_ * kReinitAfterExpands);
421 }
422 
PacketTooEarly(uint32_t timestamp_leap) const423 bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const {
424   return timestamp_leap >
425          static_cast<uint32_t>(output_size_samples_ * num_consecutive_expands_);
426 }
427 
MaxWaitForPacket() const428 bool DecisionLogic::MaxWaitForPacket() const {
429   return num_consecutive_expands_ >= kMaxWaitForPacket;
430 }
431 
432 }  // namespace webrtc
433