1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
12 #define MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
13 
14 #include <map>
15 #include <memory>
16 #include <string>
17 #include <utility>
18 #include <vector>
19 
20 #include "absl/types/optional.h"
21 #include "api/audio/audio_frame.h"
22 #include "api/neteq/neteq.h"
23 #include "api/neteq/neteq_controller.h"
24 #include "api/neteq/neteq_controller_factory.h"
25 #include "api/neteq/tick_timer.h"
26 #include "api/rtp_packet_info.h"
27 #include "modules/audio_coding/neteq/audio_multi_vector.h"
28 #include "modules/audio_coding/neteq/expand_uma_logger.h"
29 #include "modules/audio_coding/neteq/packet.h"
30 #include "modules/audio_coding/neteq/random_vector.h"
31 #include "modules/audio_coding/neteq/statistics_calculator.h"
32 #include "rtc_base/constructor_magic.h"
33 #include "rtc_base/synchronization/mutex.h"
34 #include "rtc_base/thread_annotations.h"
35 
36 namespace webrtc {
37 
38 // Forward declarations.
39 class Accelerate;
40 class BackgroundNoise;
41 class Clock;
42 class ComfortNoise;
43 class DecoderDatabase;
44 class DtmfBuffer;
45 class DtmfToneGenerator;
46 class Expand;
47 class Merge;
48 class NackTracker;
49 class Normal;
50 class PacketBuffer;
51 class RedPayloadSplitter;
52 class PostDecodeVad;
53 class PreemptiveExpand;
54 class RandomVector;
55 class SyncBuffer;
56 class TimestampScaler;
57 struct AccelerateFactory;
58 struct DtmfEvent;
59 struct ExpandFactory;
60 struct PreemptiveExpandFactory;
61 
62 class NetEqImpl : public webrtc::NetEq {
63  public:
64   enum class OutputType {
65     kNormalSpeech,
66     kPLC,
67     kCNG,
68     kPLCCNG,
69     kVadPassive,
70     kCodecPLC
71   };
72 
73   enum ErrorCodes {
74     kNoError = 0,
75     kOtherError,
76     kUnknownRtpPayloadType,
77     kDecoderNotFound,
78     kInvalidPointer,
79     kAccelerateError,
80     kPreemptiveExpandError,
81     kComfortNoiseErrorCode,
82     kDecoderErrorCode,
83     kOtherDecoderError,
84     kInvalidOperation,
85     kDtmfParsingError,
86     kDtmfInsertError,
87     kSampleUnderrun,
88     kDecodedTooMuch,
89     kRedundancySplitError,
90     kPacketBufferCorruption
91   };
92 
93   struct Dependencies {
94     // The constructor populates the Dependencies struct with the default
95     // implementations of the objects. They can all be replaced by the user
96     // before sending the struct to the NetEqImpl constructor. However, there
97     // are dependencies between some of the classes inside the struct, so
98     // swapping out one may make it necessary to re-create another one.
99     Dependencies(const NetEq::Config& config,
100                  Clock* clock,
101                  const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory,
102                  const NetEqControllerFactory& controller_factory);
103     ~Dependencies();
104 
105     Clock* const clock;
106     std::unique_ptr<TickTimer> tick_timer;
107     std::unique_ptr<StatisticsCalculator> stats;
108     std::unique_ptr<DecoderDatabase> decoder_database;
109     std::unique_ptr<DtmfBuffer> dtmf_buffer;
110     std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator;
111     std::unique_ptr<PacketBuffer> packet_buffer;
112     std::unique_ptr<NetEqController> neteq_controller;
113     std::unique_ptr<RedPayloadSplitter> red_payload_splitter;
114     std::unique_ptr<TimestampScaler> timestamp_scaler;
115     std::unique_ptr<AccelerateFactory> accelerate_factory;
116     std::unique_ptr<ExpandFactory> expand_factory;
117     std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory;
118   };
119 
120   // Creates a new NetEqImpl object.
121   NetEqImpl(const NetEq::Config& config,
122             Dependencies&& deps,
123             bool create_components = true);
124 
125   ~NetEqImpl() override;
126 
127   // Inserts a new packet into NetEq. Returns 0 on success, -1 on failure.
128   int InsertPacket(const RTPHeader& rtp_header,
129                    rtc::ArrayView<const uint8_t> payload) override;
130 
131   void InsertEmptyPacket(const RTPHeader& rtp_header) override;
132 
133   int GetAudio(
134       AudioFrame* audio_frame,
135       bool* muted,
136       absl::optional<Operation> action_override = absl::nullopt) override;
137 
138   void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) override;
139 
140   bool RegisterPayloadType(int rtp_payload_type,
141                            const SdpAudioFormat& audio_format) override;
142 
143   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
144   // -1 on failure.
145   int RemovePayloadType(uint8_t rtp_payload_type) override;
146 
147   void RemoveAllPayloadTypes() override;
148 
149   bool SetMinimumDelay(int delay_ms) override;
150 
151   bool SetMaximumDelay(int delay_ms) override;
152 
153   bool SetBaseMinimumDelayMs(int delay_ms) override;
154 
155   int GetBaseMinimumDelayMs() const override;
156 
157   int TargetDelayMs() const override;
158 
159   int FilteredCurrentDelayMs() const override;
160 
161   // Writes the current network statistics to |stats|. The statistics are reset
162   // after the call.
163   int NetworkStatistics(NetEqNetworkStatistics* stats) override;
164 
165   NetEqLifetimeStatistics GetLifetimeStatistics() const override;
166 
167   NetEqOperationsAndState GetOperationsAndState() const override;
168 
169   // Enables post-decode VAD. When enabled, GetAudio() will return
170   // kOutputVADPassive when the signal contains no speech.
171   void EnableVad() override;
172 
173   // Disables post-decode VAD.
174   void DisableVad() override;
175 
176   absl::optional<uint32_t> GetPlayoutTimestamp() const override;
177 
178   int last_output_sample_rate_hz() const override;
179 
180   absl::optional<DecoderFormat> GetDecoderFormat(
181       int payload_type) const override;
182 
183   // Flushes both the packet buffer and the sync buffer.
184   void FlushBuffers() override;
185 
186   void EnableNack(size_t max_nack_list_size) override;
187 
188   void DisableNack() override;
189 
190   std::vector<uint16_t> GetNackList(int64_t round_trip_time_ms) const override;
191 
192   std::vector<uint32_t> LastDecodedTimestamps() const override;
193 
194   int SyncBufferSizeMs() const override;
195 
196   // This accessor method is only intended for testing purposes.
197   const SyncBuffer* sync_buffer_for_test() const;
198   Operation last_operation_for_test() const;
199 
200  protected:
201   static const int kOutputSizeMs = 10;
202   static const size_t kMaxFrameSize = 5760;  // 120 ms @ 48 kHz.
203   // TODO(hlundin): Provide a better value for kSyncBufferSize.
204   // Current value is kMaxFrameSize + 60 ms * 48 kHz, which is enough for
205   // calculating correlations of current frame against history.
206   static const size_t kSyncBufferSize = kMaxFrameSize + 60 * 48;
207 
208   // Inserts a new packet into NetEq. This is used by the InsertPacket method
209   // above. Returns 0 on success, otherwise an error code.
210   // TODO(hlundin): Merge this with InsertPacket above?
211   int InsertPacketInternal(const RTPHeader& rtp_header,
212                            rtc::ArrayView<const uint8_t> payload)
213       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
214 
215   // Delivers 10 ms of audio data. The data is written to |audio_frame|.
216   // Returns 0 on success, otherwise an error code.
217   int GetAudioInternal(AudioFrame* audio_frame,
218                        bool* muted,
219                        absl::optional<Operation> action_override)
220       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
221 
222   // Provides a decision to the GetAudioInternal method. The decision what to
223   // do is written to |operation|. Packets to decode are written to
224   // |packet_list|, and a DTMF event to play is written to |dtmf_event|. When
225   // DTMF should be played, |play_dtmf| is set to true by the method.
226   // Returns 0 on success, otherwise an error code.
227   int GetDecision(Operation* operation,
228                   PacketList* packet_list,
229                   DtmfEvent* dtmf_event,
230                   bool* play_dtmf,
231                   absl::optional<Operation> action_override)
232       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
233 
234   // Decodes the speech packets in |packet_list|, and writes the results to
235   // |decoded_buffer|, which is allocated to hold |decoded_buffer_length|
236   // elements. The length of the decoded data is written to |decoded_length|.
237   // The speech type -- speech or (codec-internal) comfort noise -- is written
238   // to |speech_type|. If |packet_list| contains any SID frames for RFC 3389
239   // comfort noise, those are not decoded.
240   int Decode(PacketList* packet_list,
241              Operation* operation,
242              int* decoded_length,
243              AudioDecoder::SpeechType* speech_type)
244       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
245 
246   // Sub-method to Decode(). Performs codec internal CNG.
247   int DecodeCng(AudioDecoder* decoder,
248                 int* decoded_length,
249                 AudioDecoder::SpeechType* speech_type)
250       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
251 
252   // Sub-method to Decode(). Performs the actual decoding.
253   int DecodeLoop(PacketList* packet_list,
254                  const Operation& operation,
255                  AudioDecoder* decoder,
256                  int* decoded_length,
257                  AudioDecoder::SpeechType* speech_type)
258       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
259 
260   // Sub-method which calls the Normal class to perform the normal operation.
261   void DoNormal(const int16_t* decoded_buffer,
262                 size_t decoded_length,
263                 AudioDecoder::SpeechType speech_type,
264                 bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
265 
266   // Sub-method which calls the Merge class to perform the merge operation.
267   void DoMerge(int16_t* decoded_buffer,
268                size_t decoded_length,
269                AudioDecoder::SpeechType speech_type,
270                bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
271 
272   bool DoCodecPlc() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
273 
274   // Sub-method which calls the Expand class to perform the expand operation.
275   int DoExpand(bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
276 
277   // Sub-method which calls the Accelerate class to perform the accelerate
278   // operation.
279   int DoAccelerate(int16_t* decoded_buffer,
280                    size_t decoded_length,
281                    AudioDecoder::SpeechType speech_type,
282                    bool play_dtmf,
283                    bool fast_accelerate) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
284 
285   // Sub-method which calls the PreemptiveExpand class to perform the
286   // preemtive expand operation.
287   int DoPreemptiveExpand(int16_t* decoded_buffer,
288                          size_t decoded_length,
289                          AudioDecoder::SpeechType speech_type,
290                          bool play_dtmf) RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
291 
292   // Sub-method which calls the ComfortNoise class to generate RFC 3389 comfort
293   // noise. |packet_list| can either contain one SID frame to update the
294   // noise parameters, or no payload at all, in which case the previously
295   // received parameters are used.
296   int DoRfc3389Cng(PacketList* packet_list, bool play_dtmf)
297       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
298 
299   // Calls the audio decoder to generate codec-internal comfort noise when
300   // no packet was received.
301   void DoCodecInternalCng(const int16_t* decoded_buffer, size_t decoded_length)
302       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
303 
304   // Calls the DtmfToneGenerator class to generate DTMF tones.
305   int DoDtmf(const DtmfEvent& dtmf_event, bool* play_dtmf)
306       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
307 
308   // Overdub DTMF on top of |output|.
309   int DtmfOverdub(const DtmfEvent& dtmf_event,
310                   size_t num_channels,
311                   int16_t* output) const RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
312 
313   // Extracts packets from |packet_buffer_| to produce at least
314   // |required_samples| samples. The packets are inserted into |packet_list|.
315   // Returns the number of samples that the packets in the list will produce, or
316   // -1 in case of an error.
317   int ExtractPackets(size_t required_samples, PacketList* packet_list)
318       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
319 
320   // Resets various variables and objects to new values based on the sample rate
321   // |fs_hz| and |channels| number audio channels.
322   void SetSampleRateAndChannels(int fs_hz, size_t channels)
323       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
324 
325   // Returns the output type for the audio produced by the latest call to
326   // GetAudio().
327   OutputType LastOutputType() RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
328 
329   // Updates Expand and Merge.
330   virtual void UpdatePlcComponents(int fs_hz, size_t channels)
331       RTC_EXCLUSIVE_LOCKS_REQUIRED(mutex_);
332 
333   Clock* const clock_;
334 
335   mutable Mutex mutex_;
336   const std::unique_ptr<TickTimer> tick_timer_ RTC_GUARDED_BY(mutex_);
337   const std::unique_ptr<DecoderDatabase> decoder_database_
338       RTC_GUARDED_BY(mutex_);
339   const std::unique_ptr<DtmfBuffer> dtmf_buffer_ RTC_GUARDED_BY(mutex_);
340   const std::unique_ptr<DtmfToneGenerator> dtmf_tone_generator_
341       RTC_GUARDED_BY(mutex_);
342   const std::unique_ptr<PacketBuffer> packet_buffer_ RTC_GUARDED_BY(mutex_);
343   const std::unique_ptr<RedPayloadSplitter> red_payload_splitter_
344       RTC_GUARDED_BY(mutex_);
345   const std::unique_ptr<TimestampScaler> timestamp_scaler_
346       RTC_GUARDED_BY(mutex_);
347   const std::unique_ptr<PostDecodeVad> vad_ RTC_GUARDED_BY(mutex_);
348   const std::unique_ptr<ExpandFactory> expand_factory_ RTC_GUARDED_BY(mutex_);
349   const std::unique_ptr<AccelerateFactory> accelerate_factory_
350       RTC_GUARDED_BY(mutex_);
351   const std::unique_ptr<PreemptiveExpandFactory> preemptive_expand_factory_
352       RTC_GUARDED_BY(mutex_);
353   const std::unique_ptr<StatisticsCalculator> stats_ RTC_GUARDED_BY(mutex_);
354 
355   std::unique_ptr<BackgroundNoise> background_noise_ RTC_GUARDED_BY(mutex_);
356   std::unique_ptr<NetEqController> controller_ RTC_GUARDED_BY(mutex_);
357   std::unique_ptr<AudioMultiVector> algorithm_buffer_ RTC_GUARDED_BY(mutex_);
358   std::unique_ptr<SyncBuffer> sync_buffer_ RTC_GUARDED_BY(mutex_);
359   std::unique_ptr<Expand> expand_ RTC_GUARDED_BY(mutex_);
360   std::unique_ptr<Normal> normal_ RTC_GUARDED_BY(mutex_);
361   std::unique_ptr<Merge> merge_ RTC_GUARDED_BY(mutex_);
362   std::unique_ptr<Accelerate> accelerate_ RTC_GUARDED_BY(mutex_);
363   std::unique_ptr<PreemptiveExpand> preemptive_expand_ RTC_GUARDED_BY(mutex_);
364   RandomVector random_vector_ RTC_GUARDED_BY(mutex_);
365   std::unique_ptr<ComfortNoise> comfort_noise_ RTC_GUARDED_BY(mutex_);
366   int fs_hz_ RTC_GUARDED_BY(mutex_);
367   int fs_mult_ RTC_GUARDED_BY(mutex_);
368   int last_output_sample_rate_hz_ RTC_GUARDED_BY(mutex_);
369   size_t output_size_samples_ RTC_GUARDED_BY(mutex_);
370   size_t decoder_frame_length_ RTC_GUARDED_BY(mutex_);
371   Mode last_mode_ RTC_GUARDED_BY(mutex_);
372   Operation last_operation_ RTC_GUARDED_BY(mutex_);
373   size_t decoded_buffer_length_ RTC_GUARDED_BY(mutex_);
374   std::unique_ptr<int16_t[]> decoded_buffer_ RTC_GUARDED_BY(mutex_);
375   uint32_t playout_timestamp_ RTC_GUARDED_BY(mutex_);
376   bool new_codec_ RTC_GUARDED_BY(mutex_);
377   uint32_t timestamp_ RTC_GUARDED_BY(mutex_);
378   bool reset_decoder_ RTC_GUARDED_BY(mutex_);
379   absl::optional<uint8_t> current_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
380   absl::optional<uint8_t> current_cng_rtp_payload_type_ RTC_GUARDED_BY(mutex_);
381   bool first_packet_ RTC_GUARDED_BY(mutex_);
382   bool enable_fast_accelerate_ RTC_GUARDED_BY(mutex_);
383   std::unique_ptr<NackTracker> nack_ RTC_GUARDED_BY(mutex_);
384   bool nack_enabled_ RTC_GUARDED_BY(mutex_);
385   const bool enable_muted_state_ RTC_GUARDED_BY(mutex_);
386   AudioFrame::VADActivity last_vad_activity_ RTC_GUARDED_BY(mutex_) =
387       AudioFrame::kVadPassive;
388   std::unique_ptr<TickTimer::Stopwatch> generated_noise_stopwatch_
389       RTC_GUARDED_BY(mutex_);
390   std::vector<uint32_t> last_decoded_timestamps_ RTC_GUARDED_BY(mutex_);
391   std::vector<RtpPacketInfo> last_decoded_packet_infos_ RTC_GUARDED_BY(mutex_);
392   ExpandUmaLogger expand_uma_logger_ RTC_GUARDED_BY(mutex_);
393   ExpandUmaLogger speech_expand_uma_logger_ RTC_GUARDED_BY(mutex_);
394   bool no_time_stretching_ RTC_GUARDED_BY(mutex_);  // Only used for test.
395   rtc::BufferT<int16_t> concealment_audio_ RTC_GUARDED_BY(mutex_);
396   const bool enable_rtx_handling_ RTC_GUARDED_BY(mutex_);
397   // Data members used for adding extra delay to the output of NetEq.
398   // The delay in ms (which is 10 times the number of elements in
399   // output_delay_chain_).
400   const int output_delay_chain_ms_ RTC_GUARDED_BY(mutex_);
401   // Vector of AudioFrames which contains the delayed audio. Accessed as a
402   // circular buffer.
403   std::vector<AudioFrame> output_delay_chain_ RTC_GUARDED_BY(mutex_);
404   // Index into output_delay_chain_.
405   size_t output_delay_chain_ix_ RTC_GUARDED_BY(mutex_) = 0;
406   // Did output_delay_chain_ get populated yet?
407   bool output_delay_chain_empty_ RTC_GUARDED_BY(mutex_) = true;
408   // Contains the sample rate of the AudioFrame last emitted from the delay
409   // chain. If the extra output delay chain is not used, or if no audio has been
410   // emitted yet, the variable is empty.
411   absl::optional<int> delayed_last_output_sample_rate_hz_
412       RTC_GUARDED_BY(mutex_);
413 
414  private:
415   RTC_DISALLOW_COPY_AND_ASSIGN(NetEqImpl);
416 };
417 
418 }  // namespace webrtc
419 #endif  // MODULES_AUDIO_CODING_NETEQ_NETEQ_IMPL_H_
420