1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/rtp_rtcp/source/rtp_receiver_audio.h"
12 
13 #include <assert.h>  // assert
14 #include <math.h>   // pow()
15 #include <string.h>  // memcpy()
16 
17 #include "webrtc/base/logging.h"
18 #include "webrtc/base/trace_event.h"
19 #include "webrtc/system_wrappers/include/critical_section_wrapper.h"
20 
21 namespace webrtc {
CreateAudioStrategy(RtpData * data_callback,RtpAudioFeedback * incoming_messages_callback)22 RTPReceiverStrategy* RTPReceiverStrategy::CreateAudioStrategy(
23     RtpData* data_callback,
24     RtpAudioFeedback* incoming_messages_callback) {
25   return new RTPReceiverAudio(data_callback, incoming_messages_callback);
26 }
27 
RTPReceiverAudio(RtpData * data_callback,RtpAudioFeedback * incoming_messages_callback)28 RTPReceiverAudio::RTPReceiverAudio(RtpData* data_callback,
29                                    RtpAudioFeedback* incoming_messages_callback)
30     : RTPReceiverStrategy(data_callback),
31       TelephoneEventHandler(),
32       last_received_frequency_(8000),
33       telephone_event_forward_to_decoder_(false),
34       telephone_event_payload_type_(-1),
35       cng_nb_payload_type_(-1),
36       cng_wb_payload_type_(-1),
37       cng_swb_payload_type_(-1),
38       cng_fb_payload_type_(-1),
39       cng_payload_type_(-1),
40       g722_payload_type_(-1),
41       last_received_g722_(false),
42       num_energy_(0),
43       current_remote_energy_(),
44       cb_audio_feedback_(incoming_messages_callback) {
45   last_payload_.Audio.channels = 1;
46   memset(current_remote_energy_, 0, sizeof(current_remote_energy_));
47 }
48 
49 // Outband TelephoneEvent(DTMF) detection
SetTelephoneEventForwardToDecoder(bool forward_to_decoder)50 void RTPReceiverAudio::SetTelephoneEventForwardToDecoder(
51     bool forward_to_decoder) {
52   CriticalSectionScoped lock(crit_sect_.get());
53   telephone_event_forward_to_decoder_ = forward_to_decoder;
54 }
55 
56 // Is forwarding of outband telephone events turned on/off?
TelephoneEventForwardToDecoder() const57 bool RTPReceiverAudio::TelephoneEventForwardToDecoder() const {
58   CriticalSectionScoped lock(crit_sect_.get());
59   return telephone_event_forward_to_decoder_;
60 }
61 
TelephoneEventPayloadType(int8_t payload_type) const62 bool RTPReceiverAudio::TelephoneEventPayloadType(
63     int8_t payload_type) const {
64   CriticalSectionScoped lock(crit_sect_.get());
65   return telephone_event_payload_type_ == payload_type;
66 }
67 
CNGPayloadType(int8_t payload_type,uint32_t * frequency,bool * cng_payload_type_has_changed)68 bool RTPReceiverAudio::CNGPayloadType(int8_t payload_type,
69                                       uint32_t* frequency,
70                                       bool* cng_payload_type_has_changed) {
71   CriticalSectionScoped lock(crit_sect_.get());
72   *cng_payload_type_has_changed = false;
73 
74   //  We can have four CNG on 8000Hz, 16000Hz, 32000Hz and 48000Hz.
75   if (cng_nb_payload_type_ == payload_type) {
76     *frequency = 8000;
77     if (cng_payload_type_ != -1 && cng_payload_type_ != cng_nb_payload_type_)
78       *cng_payload_type_has_changed = true;
79 
80     cng_payload_type_ = cng_nb_payload_type_;
81     return true;
82   } else if (cng_wb_payload_type_ == payload_type) {
83     // if last received codec is G.722 we must use frequency 8000
84     if (last_received_g722_) {
85       *frequency = 8000;
86     } else {
87       *frequency = 16000;
88     }
89     if (cng_payload_type_ != -1 && cng_payload_type_ != cng_wb_payload_type_)
90       *cng_payload_type_has_changed = true;
91     cng_payload_type_ = cng_wb_payload_type_;
92     return true;
93   } else if (cng_swb_payload_type_ == payload_type) {
94     *frequency = 32000;
95     if ((cng_payload_type_ != -1) &&
96         (cng_payload_type_ != cng_swb_payload_type_))
97       *cng_payload_type_has_changed = true;
98     cng_payload_type_ = cng_swb_payload_type_;
99     return true;
100   } else if (cng_fb_payload_type_ == payload_type) {
101     *frequency = 48000;
102     if (cng_payload_type_ != -1 && cng_payload_type_ != cng_fb_payload_type_)
103       *cng_payload_type_has_changed = true;
104     cng_payload_type_ = cng_fb_payload_type_;
105     return true;
106   } else {
107     //  not CNG
108     if (g722_payload_type_ == payload_type) {
109       last_received_g722_ = true;
110     } else {
111       last_received_g722_ = false;
112     }
113   }
114   return false;
115 }
116 
ShouldReportCsrcChanges(uint8_t payload_type) const117 bool RTPReceiverAudio::ShouldReportCsrcChanges(uint8_t payload_type) const {
118   // Don't do this for DTMF packets, otherwise it's fine.
119   return !TelephoneEventPayloadType(payload_type);
120 }
121 
122 // -   Sample based or frame based codecs based on RFC 3551
123 // -
124 // -   NOTE! There is one error in the RFC, stating G.722 uses 8 bits/samples.
125 // -   The correct rate is 4 bits/sample.
126 // -
127 // -   name of                              sampling              default
128 // -   encoding  sample/frame  bits/sample      rate  ms/frame  ms/packet
129 // -
130 // -   Sample based audio codecs
131 // -   DVI4      sample        4                var.                   20
132 // -   G722      sample        4              16,000                   20
133 // -   G726-40   sample        5               8,000                   20
134 // -   G726-32   sample        4               8,000                   20
135 // -   G726-24   sample        3               8,000                   20
136 // -   G726-16   sample        2               8,000                   20
137 // -   L8        sample        8                var.                   20
138 // -   L16       sample        16               var.                   20
139 // -   PCMA      sample        8                var.                   20
140 // -   PCMU      sample        8                var.                   20
141 // -
142 // -   Frame based audio codecs
143 // -   G723      frame         N/A             8,000        30         30
144 // -   G728      frame         N/A             8,000       2.5         20
145 // -   G729      frame         N/A             8,000        10         20
146 // -   G729D     frame         N/A             8,000        10         20
147 // -   G729E     frame         N/A             8,000        10         20
148 // -   GSM       frame         N/A             8,000        20         20
149 // -   GSM-EFR   frame         N/A             8,000        20         20
150 // -   LPC       frame         N/A             8,000        20         20
151 // -   MPA       frame         N/A              var.      var.
152 // -
153 // -   G7221     frame         N/A
OnNewPayloadTypeCreated(const char payload_name[RTP_PAYLOAD_NAME_SIZE],int8_t payload_type,uint32_t frequency)154 int32_t RTPReceiverAudio::OnNewPayloadTypeCreated(
155     const char payload_name[RTP_PAYLOAD_NAME_SIZE],
156     int8_t payload_type,
157     uint32_t frequency) {
158   CriticalSectionScoped lock(crit_sect_.get());
159 
160   if (RtpUtility::StringCompare(payload_name, "telephone-event", 15)) {
161     telephone_event_payload_type_ = payload_type;
162   }
163   if (RtpUtility::StringCompare(payload_name, "cn", 2)) {
164     //  we can have three CNG on 8000Hz, 16000Hz and 32000Hz
165     if (frequency == 8000) {
166       cng_nb_payload_type_ = payload_type;
167     } else if (frequency == 16000) {
168       cng_wb_payload_type_ = payload_type;
169     } else if (frequency == 32000) {
170       cng_swb_payload_type_ = payload_type;
171     } else if (frequency == 48000) {
172       cng_fb_payload_type_ = payload_type;
173     } else {
174       assert(false);
175       return -1;
176     }
177   }
178   return 0;
179 }
180 
ParseRtpPacket(WebRtcRTPHeader * rtp_header,const PayloadUnion & specific_payload,bool is_red,const uint8_t * payload,size_t payload_length,int64_t timestamp_ms,bool is_first_packet)181 int32_t RTPReceiverAudio::ParseRtpPacket(WebRtcRTPHeader* rtp_header,
182                                          const PayloadUnion& specific_payload,
183                                          bool is_red,
184                                          const uint8_t* payload,
185                                          size_t payload_length,
186                                          int64_t timestamp_ms,
187                                          bool is_first_packet) {
188   TRACE_EVENT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"), "Audio::ParseRtp",
189                "seqnum", rtp_header->header.sequenceNumber, "timestamp",
190                rtp_header->header.timestamp);
191   rtp_header->type.Audio.numEnergy = rtp_header->header.numCSRCs;
192   num_energy_ = rtp_header->type.Audio.numEnergy;
193   if (rtp_header->type.Audio.numEnergy > 0 &&
194       rtp_header->type.Audio.numEnergy <= kRtpCsrcSize) {
195     memcpy(current_remote_energy_,
196            rtp_header->type.Audio.arrOfEnergy,
197            rtp_header->type.Audio.numEnergy);
198   }
199 
200   return ParseAudioCodecSpecific(rtp_header,
201                                  payload,
202                                  payload_length,
203                                  specific_payload.Audio,
204                                  is_red);
205 }
206 
GetPayloadTypeFrequency() const207 int RTPReceiverAudio::GetPayloadTypeFrequency() const {
208   CriticalSectionScoped lock(crit_sect_.get());
209   if (last_received_g722_) {
210     return 8000;
211   }
212   return last_received_frequency_;
213 }
214 
ProcessDeadOrAlive(uint16_t last_payload_length) const215 RTPAliveType RTPReceiverAudio::ProcessDeadOrAlive(
216     uint16_t last_payload_length) const {
217 
218   // Our CNG is 9 bytes; if it's a likely CNG the receiver needs to check
219   // kRtpNoRtp against NetEq speech_type kOutputPLCtoCNG.
220   if (last_payload_length < 10) {  // our CNG is 9 bytes
221     return kRtpNoRtp;
222   } else {
223     return kRtpDead;
224   }
225 }
226 
CheckPayloadChanged(int8_t payload_type,PayloadUnion * specific_payload,bool * should_discard_changes)227 void RTPReceiverAudio::CheckPayloadChanged(int8_t payload_type,
228                                            PayloadUnion* specific_payload,
229                                            bool* should_discard_changes) {
230   *should_discard_changes = false;
231 
232   if (TelephoneEventPayloadType(payload_type)) {
233     // Don't do callbacks for DTMF packets.
234     *should_discard_changes = true;
235     return;
236   }
237   // frequency is updated for CNG
238   bool cng_payload_type_has_changed = false;
239   bool is_cng_payload_type = CNGPayloadType(payload_type,
240                                             &specific_payload->Audio.frequency,
241                                             &cng_payload_type_has_changed);
242 
243   if (is_cng_payload_type) {
244     // Don't do callbacks for DTMF packets.
245     *should_discard_changes = true;
246     return;
247   }
248 }
249 
Energy(uint8_t array_of_energy[kRtpCsrcSize]) const250 int RTPReceiverAudio::Energy(uint8_t array_of_energy[kRtpCsrcSize]) const {
251   CriticalSectionScoped cs(crit_sect_.get());
252 
253   assert(num_energy_ <= kRtpCsrcSize);
254 
255   if (num_energy_ > 0) {
256     memcpy(array_of_energy, current_remote_energy_,
257            sizeof(uint8_t) * num_energy_);
258   }
259   return num_energy_;
260 }
261 
InvokeOnInitializeDecoder(RtpFeedback * callback,int8_t payload_type,const char payload_name[RTP_PAYLOAD_NAME_SIZE],const PayloadUnion & specific_payload) const262 int32_t RTPReceiverAudio::InvokeOnInitializeDecoder(
263     RtpFeedback* callback,
264     int8_t payload_type,
265     const char payload_name[RTP_PAYLOAD_NAME_SIZE],
266     const PayloadUnion& specific_payload) const {
267   if (-1 ==
268       callback->OnInitializeDecoder(
269           payload_type, payload_name, specific_payload.Audio.frequency,
270           specific_payload.Audio.channels, specific_payload.Audio.rate)) {
271     LOG(LS_ERROR) << "Failed to create decoder for payload type: "
272                   << payload_name << "/" << static_cast<int>(payload_type);
273     return -1;
274   }
275   return 0;
276 }
277 
278 // We are not allowed to have any critsects when calling data_callback.
ParseAudioCodecSpecific(WebRtcRTPHeader * rtp_header,const uint8_t * payload_data,size_t payload_length,const AudioPayload & audio_specific,bool is_red)279 int32_t RTPReceiverAudio::ParseAudioCodecSpecific(
280     WebRtcRTPHeader* rtp_header,
281     const uint8_t* payload_data,
282     size_t payload_length,
283     const AudioPayload& audio_specific,
284     bool is_red) {
285 
286   if (payload_length == 0) {
287     return 0;
288   }
289 
290   bool telephone_event_packet =
291       TelephoneEventPayloadType(rtp_header->header.payloadType);
292   if (telephone_event_packet) {
293     CriticalSectionScoped lock(crit_sect_.get());
294 
295     // RFC 4733 2.3
296     // 0                   1                   2                   3
297     // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
298     // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
299     // |     event     |E|R| volume    |          duration             |
300     // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
301     //
302     if (payload_length % 4 != 0) {
303       return -1;
304     }
305     size_t number_of_events = payload_length / 4;
306 
307     // sanity
308     if (number_of_events >= MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS) {
309       number_of_events = MAX_NUMBER_OF_PARALLEL_TELEPHONE_EVENTS;
310     }
311     for (size_t n = 0; n < number_of_events; ++n) {
312       bool end = (payload_data[(4 * n) + 1] & 0x80) ? true : false;
313 
314       std::set<uint8_t>::iterator event =
315           telephone_event_reported_.find(payload_data[4 * n]);
316 
317       if (event != telephone_event_reported_.end()) {
318         // we have already seen this event
319         if (end) {
320           telephone_event_reported_.erase(payload_data[4 * n]);
321         }
322       } else {
323         if (end) {
324           // don't add if it's a end of a tone
325         } else {
326           telephone_event_reported_.insert(payload_data[4 * n]);
327         }
328       }
329     }
330 
331     // RFC 4733 2.5.1.3 & 2.5.2.3 Long-Duration Events
332     // should not be a problem since we don't care about the duration
333 
334     // RFC 4733 See 2.5.1.5. & 2.5.2.4.  Multiple Events in a Packet
335   }
336 
337   {
338     CriticalSectionScoped lock(crit_sect_.get());
339 
340     if (!telephone_event_packet) {
341       last_received_frequency_ = audio_specific.frequency;
342     }
343 
344     // Check if this is a CNG packet, receiver might want to know
345     uint32_t ignored;
346     bool also_ignored;
347     if (CNGPayloadType(rtp_header->header.payloadType,
348                        &ignored,
349                        &also_ignored)) {
350       rtp_header->type.Audio.isCNG = true;
351       rtp_header->frameType = kAudioFrameCN;
352     } else {
353       rtp_header->frameType = kAudioFrameSpeech;
354       rtp_header->type.Audio.isCNG = false;
355     }
356 
357     // check if it's a DTMF event, hence something we can playout
358     if (telephone_event_packet) {
359       if (!telephone_event_forward_to_decoder_) {
360         // don't forward event to decoder
361         return 0;
362       }
363       std::set<uint8_t>::iterator first =
364           telephone_event_reported_.begin();
365       if (first != telephone_event_reported_.end() && *first > 15) {
366         // don't forward non DTMF events
367         return 0;
368       }
369     }
370   }
371   // TODO(holmer): Break this out to have RED parsing handled generically.
372   if (is_red && !(payload_data[0] & 0x80)) {
373     // we recive only one frame packed in a RED packet remove the RED wrapper
374     rtp_header->header.payloadType = payload_data[0];
375 
376     // only one frame in the RED strip the one byte to help NetEq
377     return data_callback_->OnReceivedPayloadData(
378         payload_data + 1, payload_length - 1, rtp_header);
379   }
380 
381   rtp_header->type.Audio.channel = audio_specific.channels;
382   return data_callback_->OnReceivedPayloadData(
383       payload_data, payload_length, rtp_header);
384 }
385 }  // namespace webrtc
386