1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #ifndef API_NETEQ_NETEQ_H_
12 #define API_NETEQ_NETEQ_H_
13 
14 #include <stddef.h>  // Provide access to size_t.
15 
16 #include <map>
17 #include <string>
18 #include <vector>
19 
20 #include "absl/types/optional.h"
21 #include "api/audio_codecs/audio_codec_pair_id.h"
22 #include "api/audio_codecs/audio_decoder.h"
23 #include "api/audio_codecs/audio_format.h"
24 #include "api/rtp_headers.h"
25 #include "api/scoped_refptr.h"
26 
27 namespace webrtc {
28 
29 // Forward declarations.
30 class AudioFrame;
31 class AudioDecoderFactory;
32 class Clock;
33 
34 struct NetEqNetworkStatistics {
35   uint16_t current_buffer_size_ms;    // Current jitter buffer size in ms.
36   uint16_t preferred_buffer_size_ms;  // Target buffer size in ms.
37   uint16_t jitter_peaks_found;        // 1 if adding extra delay due to peaky
38                                       // jitter; 0 otherwise.
39   uint16_t packet_loss_rate;          // Loss rate (network + late) in Q14.
40   uint16_t expand_rate;         // Fraction (of original stream) of synthesized
41                                 // audio inserted through expansion (in Q14).
42   uint16_t speech_expand_rate;  // Fraction (of original stream) of synthesized
43                                 // speech inserted through expansion (in Q14).
44   uint16_t preemptive_rate;     // Fraction of data inserted through pre-emptive
45                                 // expansion (in Q14).
46   uint16_t accelerate_rate;     // Fraction of data removed through acceleration
47                                 // (in Q14).
48   uint16_t secondary_decoded_rate;    // Fraction of data coming from FEC/RED
49                                       // decoding (in Q14).
50   uint16_t secondary_discarded_rate;  // Fraction of discarded FEC/RED data (in
51                                       // Q14).
52   size_t added_zero_samples;  // Number of zero samples added in "off" mode.
53   // Statistics for packet waiting times, i.e., the time between a packet
54   // arrives until it is decoded.
55   int mean_waiting_time_ms;
56   int median_waiting_time_ms;
57   int min_waiting_time_ms;
58   int max_waiting_time_ms;
59 };
60 
61 // NetEq statistics that persist over the lifetime of the class.
62 // These metrics are never reset.
63 struct NetEqLifetimeStatistics {
64   // Stats below correspond to similarly-named fields in the WebRTC stats spec.
65   // https://w3c.github.io/webrtc-stats/#dom-rtcmediastreamtrackstats
66   uint64_t total_samples_received = 0;
67   uint64_t concealed_samples = 0;
68   uint64_t concealment_events = 0;
69   uint64_t jitter_buffer_delay_ms = 0;
70   uint64_t jitter_buffer_emitted_count = 0;
71   uint64_t jitter_buffer_target_delay_ms = 0;
72   uint64_t inserted_samples_for_deceleration = 0;
73   uint64_t removed_samples_for_acceleration = 0;
74   uint64_t silent_concealed_samples = 0;
75   uint64_t fec_packets_received = 0;
76   uint64_t fec_packets_discarded = 0;
77   // Below stats are not part of the spec.
78   uint64_t delayed_packet_outage_samples = 0;
79   // This is sum of relative packet arrival delays of received packets so far.
80   // Since end-to-end delay of a packet is difficult to measure and is not
81   // necessarily useful for measuring jitter buffer performance, we report a
82   // relative packet arrival delay. The relative packet arrival delay of a
83   // packet is defined as the arrival delay compared to the first packet
84   // received, given that it had zero delay. To avoid clock drift, the "first"
85   // packet can be made dynamic.
86   uint64_t relative_packet_arrival_delay_ms = 0;
87   uint64_t jitter_buffer_packets_received = 0;
88   // An interruption is a loss-concealment event lasting at least 150 ms. The
89   // two stats below count the number os such events and the total duration of
90   // these events.
91   int32_t interruption_count = 0;
92   int32_t total_interruption_duration_ms = 0;
93 };
94 
95 // Metrics that describe the operations performed in NetEq, and the internal
96 // state.
97 struct NetEqOperationsAndState {
98   // These sample counters are cumulative, and don't reset. As a reference, the
99   // total number of output samples can be found in
100   // NetEqLifetimeStatistics::total_samples_received.
101   uint64_t preemptive_samples = 0;
102   uint64_t accelerate_samples = 0;
103   // Count of the number of buffer flushes.
104   uint64_t packet_buffer_flushes = 0;
105   // The number of primary packets that were discarded.
106   uint64_t discarded_primary_packets = 0;
107   // The statistics below are not cumulative.
108   // The waiting time of the last decoded packet.
109   uint64_t last_waiting_time_ms = 0;
110   // The sum of the packet and jitter buffer size in ms.
111   uint64_t current_buffer_size_ms = 0;
112   // The current frame size in ms.
113   uint64_t current_frame_size_ms = 0;
114   // Flag to indicate that the next packet is available.
115   bool next_packet_available = false;
116 };
117 
118 // This is the interface class for NetEq.
119 class NetEq {
120  public:
121   struct Config {
122     Config();
123     Config(const Config&);
124     Config(Config&&);
125     ~Config();
126     Config& operator=(const Config&);
127     Config& operator=(Config&&);
128 
129     std::string ToString() const;
130 
131     int sample_rate_hz = 16000;  // Initial value. Will change with input data.
132     bool enable_post_decode_vad = false;
133     size_t max_packets_in_buffer = 200;
134     int max_delay_ms = 0;
135     int min_delay_ms = 0;
136     bool enable_fast_accelerate = false;
137     bool enable_muted_state = false;
138     bool enable_rtx_handling = false;
139     absl::optional<AudioCodecPairId> codec_pair_id;
140     bool for_test_no_time_stretching = false;  // Use only for testing.
141     // Adds extra delay to the output of NetEq, without affecting jitter or
142     // loss behavior. This is mainly for testing. Value must be a non-negative
143     // multiple of 10 ms.
144     int extra_output_delay_ms = 0;
145   };
146 
147   enum ReturnCodes { kOK = 0, kFail = -1 };
148 
149   enum class Operation {
150     kNormal,
151     kMerge,
152     kExpand,
153     kAccelerate,
154     kFastAccelerate,
155     kPreemptiveExpand,
156     kRfc3389Cng,
157     kRfc3389CngNoPacket,
158     kCodecInternalCng,
159     kDtmf,
160     kUndefined,
161   };
162 
163   enum class Mode {
164     kNormal,
165     kExpand,
166     kMerge,
167     kAccelerateSuccess,
168     kAccelerateLowEnergy,
169     kAccelerateFail,
170     kPreemptiveExpandSuccess,
171     kPreemptiveExpandLowEnergy,
172     kPreemptiveExpandFail,
173     kRfc3389Cng,
174     kCodecInternalCng,
175     kCodecPlc,
176     kDtmf,
177     kError,
178     kUndefined,
179   };
180 
181   // Return type for GetDecoderFormat.
182   struct DecoderFormat {
183     int sample_rate_hz;
184     int num_channels;
185     SdpAudioFormat sdp_format;
186   };
187 
188   // Creates a new NetEq object, with parameters set in |config|. The |config|
189   // object will only have to be valid for the duration of the call to this
190   // method.
191   static NetEq* Create(
192       const NetEq::Config& config,
193       Clock* clock,
194       const rtc::scoped_refptr<AudioDecoderFactory>& decoder_factory);
195 
~NetEq()196   virtual ~NetEq() {}
197 
198   // Inserts a new packet into NetEq.
199   // Returns 0 on success, -1 on failure.
200   virtual int InsertPacket(const RTPHeader& rtp_header,
201                            rtc::ArrayView<const uint8_t> payload) = 0;
202 
203   // Lets NetEq know that a packet arrived with an empty payload. This typically
204   // happens when empty packets are used for probing the network channel, and
205   // these packets use RTP sequence numbers from the same series as the actual
206   // audio packets.
207   virtual void InsertEmptyPacket(const RTPHeader& rtp_header) = 0;
208 
209   // Instructs NetEq to deliver 10 ms of audio data. The data is written to
210   // |audio_frame|. All data in |audio_frame| is wiped; |data_|, |speech_type_|,
211   // |num_channels_|, |sample_rate_hz_|, |samples_per_channel_|, and
212   // |vad_activity_| are updated upon success. If an error is returned, some
213   // fields may not have been updated, or may contain inconsistent values.
214   // If muted state is enabled (through Config::enable_muted_state), |muted|
215   // may be set to true after a prolonged expand period. When this happens, the
216   // |data_| in |audio_frame| is not written, but should be interpreted as being
217   // all zeros. For testing purposes, an override can be supplied in the
218   // |action_override| argument, which will cause NetEq to take this action
219   // next, instead of the action it would normally choose.
220   // Returns kOK on success, or kFail in case of an error.
221   virtual int GetAudio(
222       AudioFrame* audio_frame,
223       bool* muted,
224       absl::optional<Operation> action_override = absl::nullopt) = 0;
225 
226   // Replaces the current set of decoders with the given one.
227   virtual void SetCodecs(const std::map<int, SdpAudioFormat>& codecs) = 0;
228 
229   // Associates |rtp_payload_type| with the given codec, which NetEq will
230   // instantiate when it needs it. Returns true iff successful.
231   virtual bool RegisterPayloadType(int rtp_payload_type,
232                                    const SdpAudioFormat& audio_format) = 0;
233 
234   // Removes |rtp_payload_type| from the codec database. Returns 0 on success,
235   // -1 on failure. Removing a payload type that is not registered is ok and
236   // will not result in an error.
237   virtual int RemovePayloadType(uint8_t rtp_payload_type) = 0;
238 
239   // Removes all payload types from the codec database.
240   virtual void RemoveAllPayloadTypes() = 0;
241 
242   // Sets a minimum delay in millisecond for packet buffer. The minimum is
243   // maintained unless a higher latency is dictated by channel condition.
244   // Returns true if the minimum is successfully applied, otherwise false is
245   // returned.
246   virtual bool SetMinimumDelay(int delay_ms) = 0;
247 
248   // Sets a maximum delay in milliseconds for packet buffer. The latency will
249   // not exceed the given value, even required delay (given the channel
250   // conditions) is higher. Calling this method has the same effect as setting
251   // the |max_delay_ms| value in the NetEq::Config struct.
252   virtual bool SetMaximumDelay(int delay_ms) = 0;
253 
254   // Sets a base minimum delay in milliseconds for packet buffer. The minimum
255   // delay which is set via |SetMinimumDelay| can't be lower than base minimum
256   // delay. Calling this method is similar to setting the |min_delay_ms| value
257   // in the NetEq::Config struct. Returns true if the base minimum is
258   // successfully applied, otherwise false is returned.
259   virtual bool SetBaseMinimumDelayMs(int delay_ms) = 0;
260 
261   // Returns current value of base minimum delay in milliseconds.
262   virtual int GetBaseMinimumDelayMs() const = 0;
263 
264   // Returns the current target delay in ms. This includes any extra delay
265   // requested through SetMinimumDelay.
266   virtual int TargetDelayMs() const = 0;
267 
268   // Returns the current total delay (packet buffer and sync buffer) in ms,
269   // with smoothing applied to even out short-time fluctuations due to jitter.
270   // The packet buffer part of the delay is not updated during DTX/CNG periods.
271   virtual int FilteredCurrentDelayMs() const = 0;
272 
273   // Writes the current network statistics to |stats|. The statistics are reset
274   // after the call.
275   virtual int NetworkStatistics(NetEqNetworkStatistics* stats) = 0;
276 
277   // Returns a copy of this class's lifetime statistics. These statistics are
278   // never reset.
279   virtual NetEqLifetimeStatistics GetLifetimeStatistics() const = 0;
280 
281   // Returns statistics about the performed operations and internal state. These
282   // statistics are never reset.
283   virtual NetEqOperationsAndState GetOperationsAndState() const = 0;
284 
285   // Enables post-decode VAD. When enabled, GetAudio() will return
286   // kOutputVADPassive when the signal contains no speech.
287   virtual void EnableVad() = 0;
288 
289   // Disables post-decode VAD.
290   virtual void DisableVad() = 0;
291 
292   // Returns the RTP timestamp for the last sample delivered by GetAudio().
293   // The return value will be empty if no valid timestamp is available.
294   virtual absl::optional<uint32_t> GetPlayoutTimestamp() const = 0;
295 
296   // Returns the sample rate in Hz of the audio produced in the last GetAudio
297   // call. If GetAudio has not been called yet, the configured sample rate
298   // (Config::sample_rate_hz) is returned.
299   virtual int last_output_sample_rate_hz() const = 0;
300 
301   // Returns the decoder info for the given payload type. Returns empty if no
302   // such payload type was registered.
303   virtual absl::optional<DecoderFormat> GetDecoderFormat(
304       int payload_type) const = 0;
305 
306   // Flushes both the packet buffer and the sync buffer.
307   virtual void FlushBuffers() = 0;
308 
309   // Enables NACK and sets the maximum size of the NACK list, which should be
310   // positive and no larger than Nack::kNackListSizeLimit. If NACK is already
311   // enabled then the maximum NACK list size is modified accordingly.
312   virtual void EnableNack(size_t max_nack_list_size) = 0;
313 
314   virtual void DisableNack() = 0;
315 
316   // Returns a list of RTP sequence numbers corresponding to packets to be
317   // retransmitted, given an estimate of the round-trip time in milliseconds.
318   virtual std::vector<uint16_t> GetNackList(
319       int64_t round_trip_time_ms) const = 0;
320 
321   // Returns a vector containing the timestamps of the packets that were decoded
322   // in the last GetAudio call. If no packets were decoded in the last call, the
323   // vector is empty.
324   // Mainly intended for testing.
325   virtual std::vector<uint32_t> LastDecodedTimestamps() const = 0;
326 
327   // Returns the length of the audio yet to play in the sync buffer.
328   // Mainly intended for testing.
329   virtual int SyncBufferSizeMs() const = 0;
330 };
331 
332 }  // namespace webrtc
333 #endif  // API_NETEQ_NETEQ_H_
334