/* * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "modules/audio_coding/neteq/decision_logic.h" #include #include #include #include "absl/types/optional.h" #include "modules/audio_coding/neteq/packet_buffer.h" #include "rtc_base/checks.h" #include "rtc_base/experiments/field_trial_parser.h" #include "rtc_base/logging.h" #include "rtc_base/numerics/safe_conversions.h" #include "system_wrappers/include/field_trial.h" namespace { constexpr int kPostponeDecodingLevel = 50; constexpr int kDefaultTargetLevelWindowMs = 100; } // namespace namespace webrtc { DecisionLogic::DecisionLogic(NetEqController::Config config) : delay_manager_(DelayManager::Create(config.max_packets_in_buffer, config.base_min_delay_ms, config.enable_rtx_handling, config.tick_timer)), tick_timer_(config.tick_timer), disallow_time_stretching_(!config.allow_time_stretching), timescale_countdown_( tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1)), estimate_dtx_delay_("estimate_dtx_delay", false), time_stretch_cn_("time_stretch_cn", false), target_level_window_ms_("target_level_window", kDefaultTargetLevelWindowMs, 0, absl::nullopt) { const std::string field_trial_name = field_trial::FindFullName("WebRTC-Audio-NetEqDecisionLogicSettings"); ParseFieldTrial( {&estimate_dtx_delay_, &time_stretch_cn_, &target_level_window_ms_}, field_trial_name); RTC_LOG(LS_INFO) << "NetEq decision logic settings:" " estimate_dtx_delay=" << estimate_dtx_delay_ << " time_stretch_cn=" << time_stretch_cn_ << " target_level_window_ms=" << target_level_window_ms_; } DecisionLogic::~DecisionLogic() = default; void DecisionLogic::Reset() { cng_state_ = kCngOff; noise_fast_forward_ = 0; packet_length_samples_ = 0; sample_memory_ = 0; prev_time_scale_ = false; timescale_countdown_.reset(); num_consecutive_expands_ = 0; time_stretched_cn_samples_ = 0; } void DecisionLogic::SoftReset() { packet_length_samples_ = 0; sample_memory_ = 0; prev_time_scale_ = false; timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval + 1); time_stretched_cn_samples_ = 0; delay_manager_->Reset(); buffer_level_filter_.Reset(); } void DecisionLogic::SetSampleRate(int fs_hz, size_t output_size_samples) { // TODO(hlundin): Change to an enumerator and skip assert. assert(fs_hz == 8000 || fs_hz == 16000 || fs_hz == 32000 || fs_hz == 48000); sample_rate_ = fs_hz; output_size_samples_ = output_size_samples; } NetEq::Operation DecisionLogic::GetDecision(const NetEqStatus& status, bool* reset_decoder) { // If last mode was CNG (or Expand, since this could be covering up for // a lost CNG packet), remember that CNG is on. This is needed if comfort // noise is interrupted by DTMF. if (status.last_mode == NetEq::Mode::kRfc3389Cng) { cng_state_ = kCngRfc3389On; } else if (status.last_mode == NetEq::Mode::kCodecInternalCng) { cng_state_ = kCngInternalOn; } size_t cur_size_samples = estimate_dtx_delay_ ? status.packet_buffer_info.span_samples : status.packet_buffer_info.num_samples; prev_time_scale_ = prev_time_scale_ && (status.last_mode == NetEq::Mode::kAccelerateSuccess || status.last_mode == NetEq::Mode::kAccelerateLowEnergy || status.last_mode == NetEq::Mode::kPreemptiveExpandSuccess || status.last_mode == NetEq::Mode::kPreemptiveExpandLowEnergy); // Do not update buffer history if currently playing CNG since it will bias // the filtered buffer level. if (status.last_mode != NetEq::Mode::kRfc3389Cng && status.last_mode != NetEq::Mode::kCodecInternalCng && !(status.next_packet && status.next_packet->is_dtx && !estimate_dtx_delay_)) { FilterBufferLevel(cur_size_samples); } // Guard for errors, to avoid getting stuck in error mode. if (status.last_mode == NetEq::Mode::kError) { if (!status.next_packet) { return NetEq::Operation::kExpand; } else { // Use kUndefined to flag for a reset. return NetEq::Operation::kUndefined; } } if (status.next_packet && status.next_packet->is_cng) { return CngOperation(status.last_mode, status.target_timestamp, status.next_packet->timestamp, status.generated_noise_samples); } // Handle the case with no packet at all available (except maybe DTMF). if (!status.next_packet) { return NoPacket(status.play_dtmf); } // If the expand period was very long, reset NetEQ since it is likely that the // sender was restarted. if (num_consecutive_expands_ > kReinitAfterExpands) { *reset_decoder = true; return NetEq::Operation::kNormal; } // Make sure we don't restart audio too soon after an expansion to avoid // running out of data right away again. We should only wait if there are no // DTX or CNG packets in the buffer (otherwise we should just play out what we // have, since we cannot know the exact duration of DTX or CNG packets), and // if the mute factor is low enough (otherwise the expansion was short enough // to not be noticable). // Note that the MuteFactor is in Q14, so a value of 16384 corresponds to 1. const size_t current_span = estimate_dtx_delay_ ? status.packet_buffer_info.span_samples : status.packet_buffer_info.span_samples_no_dtx; if ((status.last_mode == NetEq::Mode::kExpand || status.last_mode == NetEq::Mode::kCodecPlc) && status.expand_mutefactor < 16384 / 2 && current_span(delay_manager_->TargetLevel() * packet_length_samples_ * kPostponeDecodingLevel / 100)>> 8 && !status.packet_buffer_info.dtx_or_cng) { return NetEq::Operation::kExpand; } const uint32_t five_seconds_samples = static_cast(5 * sample_rate_); // Check if the required packet is available. if (status.target_timestamp == status.next_packet->timestamp) { return ExpectedPacketAvailable(status.last_mode, status.play_dtmf); } else if (!PacketBuffer::IsObsoleteTimestamp(status.next_packet->timestamp, status.target_timestamp, five_seconds_samples)) { return FuturePacketAvailable( status.last_packet_samples, status.last_mode, status.target_timestamp, status.next_packet->timestamp, status.play_dtmf, status.generated_noise_samples, status.packet_buffer_info.span_samples, status.packet_buffer_info.num_packets); } else { // This implies that available_timestamp < target_timestamp, which can // happen when a new stream or codec is received. Signal for a reset. return NetEq::Operation::kUndefined; } } void DecisionLogic::ExpandDecision(NetEq::Operation operation) { if (operation == NetEq::Operation::kExpand) { num_consecutive_expands_++; } else { num_consecutive_expands_ = 0; } } absl::optional DecisionLogic::PacketArrived(bool last_cng_or_dtmf, size_t packet_length_samples, bool should_update_stats, uint16_t main_sequence_number, uint32_t main_timestamp, int fs_hz) { delay_manager_->LastDecodedWasCngOrDtmf(last_cng_or_dtmf); absl::optional relative_delay; if (delay_manager_->last_pack_cng_or_dtmf() == 0) { // Calculate the total speech length carried in each packet. if (packet_length_samples > 0 && packet_length_samples != packet_length_samples_) { packet_length_samples_ = packet_length_samples; delay_manager_->SetPacketAudioLength( rtc::dchecked_cast((1000 * packet_length_samples) / fs_hz)); } // Update statistics. if (should_update_stats) { relative_delay = delay_manager_->Update(main_sequence_number, main_timestamp, fs_hz); } } else if (delay_manager_->last_pack_cng_or_dtmf() == -1) { // This is first "normal" packet after CNG or DTMF. // Reset packet time counter and measure time until next packet, // but don't update statistics. delay_manager_->set_last_pack_cng_or_dtmf(0); delay_manager_->ResetPacketIatCount(); } return relative_delay; } void DecisionLogic::FilterBufferLevel(size_t buffer_size_samples) { buffer_level_filter_.SetTargetBufferLevel( delay_manager_->base_target_level()); int time_stretched_samples = time_stretched_cn_samples_; if (prev_time_scale_) { time_stretched_samples += sample_memory_; timescale_countdown_ = tick_timer_->GetNewCountdown(kMinTimescaleInterval); } buffer_level_filter_.Update(buffer_size_samples, time_stretched_samples); prev_time_scale_ = false; time_stretched_cn_samples_ = 0; } NetEq::Operation DecisionLogic::CngOperation(NetEq::Mode prev_mode, uint32_t target_timestamp, uint32_t available_timestamp, size_t generated_noise_samples) { // Signed difference between target and available timestamp. int32_t timestamp_diff = static_cast( static_cast(generated_noise_samples + target_timestamp) - available_timestamp); int32_t optimal_level_samp = static_cast( (delay_manager_->TargetLevel() * packet_length_samples_) >> 8); const int64_t excess_waiting_time_samp = -static_cast(timestamp_diff) - optimal_level_samp; if (excess_waiting_time_samp > optimal_level_samp / 2) { // The waiting time for this packet will be longer than 1.5 // times the wanted buffer delay. Apply fast-forward to cut the // waiting time down to the optimal. noise_fast_forward_ = rtc::saturated_cast(noise_fast_forward_ + excess_waiting_time_samp); timestamp_diff = rtc::saturated_cast(timestamp_diff + excess_waiting_time_samp); } if (timestamp_diff < 0 && prev_mode == NetEq::Mode::kRfc3389Cng) { // Not time to play this packet yet. Wait another round before using this // packet. Keep on playing CNG from previous CNG parameters. return NetEq::Operation::kRfc3389CngNoPacket; } else { // Otherwise, go for the CNG packet now. noise_fast_forward_ = 0; return NetEq::Operation::kRfc3389Cng; } } NetEq::Operation DecisionLogic::NoPacket(bool play_dtmf) { if (cng_state_ == kCngRfc3389On) { // Keep on playing comfort noise. return NetEq::Operation::kRfc3389CngNoPacket; } else if (cng_state_ == kCngInternalOn) { // Keep on playing codec internal comfort noise. return NetEq::Operation::kCodecInternalCng; } else if (play_dtmf) { return NetEq::Operation::kDtmf; } else { // Nothing to play, do expand. return NetEq::Operation::kExpand; } } NetEq::Operation DecisionLogic::ExpectedPacketAvailable(NetEq::Mode prev_mode, bool play_dtmf) { if (!disallow_time_stretching_ && prev_mode != NetEq::Mode::kExpand && !play_dtmf) { // Check criterion for time-stretching. The values are in number of packets // in Q8. int low_limit, high_limit; delay_manager_->BufferLimits(&low_limit, &high_limit); int buffer_level_packets = 0; if (packet_length_samples_ > 0) { buffer_level_packets = ((1 << 8) * buffer_level_filter_.filtered_current_level()) / packet_length_samples_; } if (buffer_level_packets >= high_limit << 2) return NetEq::Operation::kFastAccelerate; if (TimescaleAllowed()) { if (buffer_level_packets >= high_limit) return NetEq::Operation::kAccelerate; if (buffer_level_packets < low_limit) return NetEq::Operation::kPreemptiveExpand; } } return NetEq::Operation::kNormal; } NetEq::Operation DecisionLogic::FuturePacketAvailable( size_t decoder_frame_length, NetEq::Mode prev_mode, uint32_t target_timestamp, uint32_t available_timestamp, bool play_dtmf, size_t generated_noise_samples, size_t span_samples_in_packet_buffer, size_t num_packets_in_packet_buffer) { // Required packet is not available, but a future packet is. // Check if we should continue with an ongoing expand because the new packet // is too far into the future. uint32_t timestamp_leap = available_timestamp - target_timestamp; if ((prev_mode == NetEq::Mode::kExpand || prev_mode == NetEq::Mode::kCodecPlc) && !ReinitAfterExpands(timestamp_leap) && !MaxWaitForPacket() && PacketTooEarly(timestamp_leap) && UnderTargetLevel()) { if (play_dtmf) { // Still have DTMF to play, so do not do expand. return NetEq::Operation::kDtmf; } else { // Nothing to play. return NetEq::Operation::kExpand; } } if (prev_mode == NetEq::Mode::kCodecPlc) { return NetEq::Operation::kNormal; } // If previous was comfort noise, then no merge is needed. if (prev_mode == NetEq::Mode::kRfc3389Cng || prev_mode == NetEq::Mode::kCodecInternalCng) { size_t cur_size_samples = estimate_dtx_delay_ ? cur_size_samples = span_samples_in_packet_buffer : num_packets_in_packet_buffer * decoder_frame_length; // Target level is in number of packets in Q8. const size_t target_level_samples = (delay_manager_->TargetLevel() * packet_length_samples_) >> 8; const bool generated_enough_noise = static_cast(generated_noise_samples + target_timestamp) >= available_timestamp; if (time_stretch_cn_) { const size_t target_threshold_samples = target_level_window_ms_ / 2 * (sample_rate_ / 1000); const bool above_target_window = cur_size_samples > target_level_samples + target_threshold_samples; const bool below_target_window = target_level_samples > target_threshold_samples && cur_size_samples < target_level_samples - target_threshold_samples; // Keep the delay same as before CNG, but make sure that it is within the // target window. if ((generated_enough_noise && !below_target_window) || above_target_window) { time_stretched_cn_samples_ = timestamp_leap - generated_noise_samples; return NetEq::Operation::kNormal; } } else { // Keep the same delay as before the CNG, but make sure that the number of // samples in buffer is no higher than 4 times the optimal level. if (generated_enough_noise || cur_size_samples > target_level_samples * 4) { // Time to play this new packet. return NetEq::Operation::kNormal; } } // Too early to play this new packet; keep on playing comfort noise. if (prev_mode == NetEq::Mode::kRfc3389Cng) { return NetEq::Operation::kRfc3389CngNoPacket; } // prevPlayMode == kModeCodecInternalCng. return NetEq::Operation::kCodecInternalCng; } // Do not merge unless we have done an expand before. if (prev_mode == NetEq::Mode::kExpand) { return NetEq::Operation::kMerge; } else if (play_dtmf) { // Play DTMF instead of expand. return NetEq::Operation::kDtmf; } else { return NetEq::Operation::kExpand; } } bool DecisionLogic::UnderTargetLevel() const { int buffer_level_packets = 0; if (packet_length_samples_ > 0) { buffer_level_packets = ((1 << 8) * buffer_level_filter_.filtered_current_level()) / packet_length_samples_; } return buffer_level_packets <= delay_manager_->TargetLevel(); } bool DecisionLogic::ReinitAfterExpands(uint32_t timestamp_leap) const { return timestamp_leap >= static_cast(output_size_samples_ * kReinitAfterExpands); } bool DecisionLogic::PacketTooEarly(uint32_t timestamp_leap) const { return timestamp_leap > static_cast(output_size_samples_ * num_consecutive_expands_); } bool DecisionLogic::MaxWaitForPacket() const { return num_consecutive_expands_ >= kMaxWaitForPacket; } } // namespace webrtc