1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/rtp_rtcp/source/rtp_sender_audio.h"
12 
13 #include <string.h>
14 
15 #include "webrtc/base/trace_event.h"
16 #include "webrtc/modules/rtp_rtcp/include/rtp_rtcp_defines.h"
17 #include "webrtc/modules/rtp_rtcp/source/byte_io.h"
18 #include "webrtc/system_wrappers/include/tick_util.h"
19 
20 namespace webrtc {
21 
22 static const int kDtmfFrequencyHz = 8000;
23 
RTPSenderAudio(Clock * clock,RTPSender * rtpSender,RtpAudioFeedback * audio_feedback)24 RTPSenderAudio::RTPSenderAudio(Clock* clock,
25                                RTPSender* rtpSender,
26                                RtpAudioFeedback* audio_feedback)
27     : _clock(clock),
28       _rtpSender(rtpSender),
29       _audioFeedback(audio_feedback),
30       _sendAudioCritsect(CriticalSectionWrapper::CreateCriticalSection()),
31       _packetSizeSamples(160),
32       _dtmfEventIsOn(false),
33       _dtmfEventFirstPacketSent(false),
34       _dtmfPayloadType(-1),
35       _dtmfTimestamp(0),
36       _dtmfKey(0),
37       _dtmfLengthSamples(0),
38       _dtmfLevel(0),
39       _dtmfTimeLastSent(0),
40       _dtmfTimestampLastSent(0),
41       _REDPayloadType(-1),
42       _inbandVADactive(false),
43       _cngNBPayloadType(-1),
44       _cngWBPayloadType(-1),
45       _cngSWBPayloadType(-1),
46       _cngFBPayloadType(-1),
47       _lastPayloadType(-1),
48       _audioLevel_dBov(0) {}
49 
~RTPSenderAudio()50 RTPSenderAudio::~RTPSenderAudio() {}
51 
AudioFrequency() const52 int RTPSenderAudio::AudioFrequency() const {
53   return kDtmfFrequencyHz;
54 }
55 
56 // set audio packet size, used to determine when it's time to send a DTMF packet
57 // in silence (CNG)
SetAudioPacketSize(uint16_t packetSizeSamples)58 int32_t RTPSenderAudio::SetAudioPacketSize(uint16_t packetSizeSamples) {
59   CriticalSectionScoped cs(_sendAudioCritsect.get());
60 
61   _packetSizeSamples = packetSizeSamples;
62   return 0;
63 }
64 
RegisterAudioPayload(const char payloadName[RTP_PAYLOAD_NAME_SIZE],const int8_t payloadType,const uint32_t frequency,const size_t channels,const uint32_t rate,RtpUtility::Payload ** payload)65 int32_t RTPSenderAudio::RegisterAudioPayload(
66     const char payloadName[RTP_PAYLOAD_NAME_SIZE],
67     const int8_t payloadType,
68     const uint32_t frequency,
69     const size_t channels,
70     const uint32_t rate,
71     RtpUtility::Payload** payload) {
72   if (RtpUtility::StringCompare(payloadName, "cn", 2)) {
73     CriticalSectionScoped cs(_sendAudioCritsect.get());
74     //  we can have multiple CNG payload types
75     switch (frequency) {
76       case 8000:
77         _cngNBPayloadType = payloadType;
78         break;
79       case 16000:
80         _cngWBPayloadType = payloadType;
81         break;
82       case 32000:
83         _cngSWBPayloadType = payloadType;
84         break;
85       case 48000:
86         _cngFBPayloadType = payloadType;
87         break;
88       default:
89         return -1;
90     }
91   } else if (RtpUtility::StringCompare(payloadName, "telephone-event", 15)) {
92     CriticalSectionScoped cs(_sendAudioCritsect.get());
93     // Don't add it to the list
94     // we dont want to allow send with a DTMF payloadtype
95     _dtmfPayloadType = payloadType;
96     return 0;
97     // The default timestamp rate is 8000 Hz, but other rates may be defined.
98   }
99   *payload = new RtpUtility::Payload;
100   (*payload)->typeSpecific.Audio.frequency = frequency;
101   (*payload)->typeSpecific.Audio.channels = channels;
102   (*payload)->typeSpecific.Audio.rate = rate;
103   (*payload)->audio = true;
104   (*payload)->name[RTP_PAYLOAD_NAME_SIZE - 1] = '\0';
105   strncpy((*payload)->name, payloadName, RTP_PAYLOAD_NAME_SIZE - 1);
106   return 0;
107 }
108 
MarkerBit(FrameType frameType,int8_t payload_type)109 bool RTPSenderAudio::MarkerBit(FrameType frameType, int8_t payload_type) {
110   CriticalSectionScoped cs(_sendAudioCritsect.get());
111   // for audio true for first packet in a speech burst
112   bool markerBit = false;
113   if (_lastPayloadType != payload_type) {
114     if (payload_type != -1 && (_cngNBPayloadType == payload_type ||
115                                _cngWBPayloadType == payload_type ||
116                                _cngSWBPayloadType == payload_type ||
117                                _cngFBPayloadType == payload_type)) {
118       // Only set a marker bit when we change payload type to a non CNG
119       return false;
120     }
121 
122     // payload_type differ
123     if (_lastPayloadType == -1) {
124       if (frameType != kAudioFrameCN) {
125         // first packet and NOT CNG
126         return true;
127       } else {
128         // first packet and CNG
129         _inbandVADactive = true;
130         return false;
131       }
132     }
133 
134     // not first packet AND
135     // not CNG AND
136     // payload_type changed
137 
138     // set a marker bit when we change payload type
139     markerBit = true;
140   }
141 
142   // For G.723 G.729, AMR etc we can have inband VAD
143   if (frameType == kAudioFrameCN) {
144     _inbandVADactive = true;
145   } else if (_inbandVADactive) {
146     _inbandVADactive = false;
147     markerBit = true;
148   }
149   return markerBit;
150 }
151 
SendAudio(FrameType frameType,int8_t payloadType,uint32_t captureTimeStamp,const uint8_t * payloadData,size_t dataSize,const RTPFragmentationHeader * fragmentation)152 int32_t RTPSenderAudio::SendAudio(FrameType frameType,
153                                   int8_t payloadType,
154                                   uint32_t captureTimeStamp,
155                                   const uint8_t* payloadData,
156                                   size_t dataSize,
157                                   const RTPFragmentationHeader* fragmentation) {
158   // TODO(pwestin) Breakup function in smaller functions.
159   size_t payloadSize = dataSize;
160   size_t maxPayloadLength = _rtpSender->MaxPayloadLength();
161   bool dtmfToneStarted = false;
162   uint16_t dtmfLengthMS = 0;
163   uint8_t key = 0;
164   int red_payload_type;
165   uint8_t audio_level_dbov;
166   int8_t dtmf_payload_type;
167   uint16_t packet_size_samples;
168   {
169     CriticalSectionScoped cs(_sendAudioCritsect.get());
170     red_payload_type = _REDPayloadType;
171     audio_level_dbov = _audioLevel_dBov;
172     dtmf_payload_type = _dtmfPayloadType;
173     packet_size_samples = _packetSizeSamples;
174   }
175 
176   // Check if we have pending DTMFs to send
177   if (!_dtmfEventIsOn && PendingDTMF()) {
178     int64_t delaySinceLastDTMF =
179         _clock->TimeInMilliseconds() - _dtmfTimeLastSent;
180 
181     if (delaySinceLastDTMF > 100) {
182       // New tone to play
183       _dtmfTimestamp = captureTimeStamp;
184       if (NextDTMF(&key, &dtmfLengthMS, &_dtmfLevel) >= 0) {
185         _dtmfEventFirstPacketSent = false;
186         _dtmfKey = key;
187         _dtmfLengthSamples = (kDtmfFrequencyHz / 1000) * dtmfLengthMS;
188         dtmfToneStarted = true;
189         _dtmfEventIsOn = true;
190       }
191     }
192   }
193   if (dtmfToneStarted) {
194     if (_audioFeedback)
195       _audioFeedback->OnPlayTelephoneEvent(key, dtmfLengthMS, _dtmfLevel);
196   }
197 
198   // A source MAY send events and coded audio packets for the same time
199   // but we don't support it
200   if (_dtmfEventIsOn) {
201     if (frameType == kEmptyFrame) {
202       // kEmptyFrame is used to drive the DTMF when in CN mode
203       // it can be triggered more frequently than we want to send the
204       // DTMF packets.
205       if (packet_size_samples > (captureTimeStamp - _dtmfTimestampLastSent)) {
206         // not time to send yet
207         return 0;
208       }
209     }
210     _dtmfTimestampLastSent = captureTimeStamp;
211     uint32_t dtmfDurationSamples = captureTimeStamp - _dtmfTimestamp;
212     bool ended = false;
213     bool send = true;
214 
215     if (_dtmfLengthSamples > dtmfDurationSamples) {
216       if (dtmfDurationSamples <= 0) {
217         // Skip send packet at start, since we shouldn't use duration 0
218         send = false;
219       }
220     } else {
221       ended = true;
222       _dtmfEventIsOn = false;
223       _dtmfTimeLastSent = _clock->TimeInMilliseconds();
224     }
225     if (send) {
226       if (dtmfDurationSamples > 0xffff) {
227         // RFC 4733 2.5.2.3 Long-Duration Events
228         SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp,
229                                  static_cast<uint16_t>(0xffff), false);
230 
231         // set new timestap for this segment
232         _dtmfTimestamp = captureTimeStamp;
233         dtmfDurationSamples -= 0xffff;
234         _dtmfLengthSamples -= 0xffff;
235 
236         return SendTelephoneEventPacket(
237             ended, dtmf_payload_type, _dtmfTimestamp,
238             static_cast<uint16_t>(dtmfDurationSamples), false);
239       } else {
240         if (SendTelephoneEventPacket(ended, dtmf_payload_type, _dtmfTimestamp,
241                                      static_cast<uint16_t>(dtmfDurationSamples),
242                                      !_dtmfEventFirstPacketSent) != 0) {
243           return -1;
244         }
245         _dtmfEventFirstPacketSent = true;
246         return 0;
247       }
248     }
249     return 0;
250   }
251   if (payloadSize == 0 || payloadData == NULL) {
252     if (frameType == kEmptyFrame) {
253       // we don't send empty audio RTP packets
254       // no error since we use it to drive DTMF when we use VAD
255       return 0;
256     }
257     return -1;
258   }
259   uint8_t dataBuffer[IP_PACKET_SIZE];
260   bool markerBit = MarkerBit(frameType, payloadType);
261 
262   int32_t rtpHeaderLength = 0;
263   uint16_t timestampOffset = 0;
264 
265   if (red_payload_type >= 0 && fragmentation && !markerBit &&
266       fragmentation->fragmentationVectorSize > 1) {
267     // have we configured RED? use its payload type
268     // we need to get the current timestamp to calc the diff
269     uint32_t oldTimeStamp = _rtpSender->Timestamp();
270     rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, red_payload_type,
271                                                  markerBit, captureTimeStamp,
272                                                  _clock->TimeInMilliseconds());
273 
274     timestampOffset = uint16_t(_rtpSender->Timestamp() - oldTimeStamp);
275   } else {
276     rtpHeaderLength = _rtpSender->BuildRTPheader(dataBuffer, payloadType,
277                                                  markerBit, captureTimeStamp,
278                                                  _clock->TimeInMilliseconds());
279   }
280   if (rtpHeaderLength <= 0) {
281     return -1;
282   }
283   if (maxPayloadLength < (rtpHeaderLength + payloadSize)) {
284     // Too large payload buffer.
285     return -1;
286   }
287   if (red_payload_type >= 0 &&  // Have we configured RED?
288       fragmentation && fragmentation->fragmentationVectorSize > 1 &&
289       !markerBit) {
290     if (timestampOffset <= 0x3fff) {
291       if (fragmentation->fragmentationVectorSize != 2) {
292         // we only support 2 codecs when using RED
293         return -1;
294       }
295       // only 0x80 if we have multiple blocks
296       dataBuffer[rtpHeaderLength++] =
297           0x80 + fragmentation->fragmentationPlType[1];
298       size_t blockLength = fragmentation->fragmentationLength[1];
299 
300       // sanity blockLength
301       if (blockLength > 0x3ff) {  // block length 10 bits 1023 bytes
302         return -1;
303       }
304       uint32_t REDheader = (timestampOffset << 10) + blockLength;
305       ByteWriter<uint32_t>::WriteBigEndian(dataBuffer + rtpHeaderLength,
306                                            REDheader);
307       rtpHeaderLength += 3;
308 
309       dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
310       // copy the RED data
311       memcpy(dataBuffer + rtpHeaderLength,
312              payloadData + fragmentation->fragmentationOffset[1],
313              fragmentation->fragmentationLength[1]);
314 
315       // copy the normal data
316       memcpy(
317           dataBuffer + rtpHeaderLength + fragmentation->fragmentationLength[1],
318           payloadData + fragmentation->fragmentationOffset[0],
319           fragmentation->fragmentationLength[0]);
320 
321       payloadSize = fragmentation->fragmentationLength[0] +
322                     fragmentation->fragmentationLength[1];
323     } else {
324       // silence for too long send only new data
325       dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
326       memcpy(dataBuffer + rtpHeaderLength,
327              payloadData + fragmentation->fragmentationOffset[0],
328              fragmentation->fragmentationLength[0]);
329 
330       payloadSize = fragmentation->fragmentationLength[0];
331     }
332   } else {
333     if (fragmentation && fragmentation->fragmentationVectorSize > 0) {
334       // use the fragment info if we have one
335       dataBuffer[rtpHeaderLength++] = fragmentation->fragmentationPlType[0];
336       memcpy(dataBuffer + rtpHeaderLength,
337              payloadData + fragmentation->fragmentationOffset[0],
338              fragmentation->fragmentationLength[0]);
339 
340       payloadSize = fragmentation->fragmentationLength[0];
341     } else {
342       memcpy(dataBuffer + rtpHeaderLength, payloadData, payloadSize);
343     }
344   }
345   {
346     CriticalSectionScoped cs(_sendAudioCritsect.get());
347     _lastPayloadType = payloadType;
348   }
349   // Update audio level extension, if included.
350   size_t packetSize = payloadSize + rtpHeaderLength;
351   RtpUtility::RtpHeaderParser rtp_parser(dataBuffer, packetSize);
352   RTPHeader rtp_header;
353   rtp_parser.Parse(&rtp_header);
354   _rtpSender->UpdateAudioLevel(dataBuffer, packetSize, rtp_header,
355                                (frameType == kAudioFrameSpeech),
356                                audio_level_dbov);
357   TRACE_EVENT_ASYNC_END2("webrtc", "Audio", captureTimeStamp, "timestamp",
358                          _rtpSender->Timestamp(), "seqnum",
359                          _rtpSender->SequenceNumber());
360   return _rtpSender->SendToNetwork(dataBuffer, payloadSize, rtpHeaderLength,
361                                    TickTime::MillisecondTimestamp(),
362                                    kAllowRetransmission,
363                                    RtpPacketSender::kHighPriority);
364 }
365 
366 // Audio level magnitude and voice activity flag are set for each RTP packet
SetAudioLevel(uint8_t level_dBov)367 int32_t RTPSenderAudio::SetAudioLevel(uint8_t level_dBov) {
368   if (level_dBov > 127) {
369     return -1;
370   }
371   CriticalSectionScoped cs(_sendAudioCritsect.get());
372   _audioLevel_dBov = level_dBov;
373   return 0;
374 }
375 
376 // Set payload type for Redundant Audio Data RFC 2198
SetRED(int8_t payloadType)377 int32_t RTPSenderAudio::SetRED(int8_t payloadType) {
378   if (payloadType < -1) {
379     return -1;
380   }
381   CriticalSectionScoped cs(_sendAudioCritsect.get());
382   _REDPayloadType = payloadType;
383   return 0;
384 }
385 
386 // Get payload type for Redundant Audio Data RFC 2198
RED(int8_t * payloadType) const387 int32_t RTPSenderAudio::RED(int8_t* payloadType) const {
388   CriticalSectionScoped cs(_sendAudioCritsect.get());
389   if (_REDPayloadType == -1) {
390     // not configured
391     return -1;
392   }
393   *payloadType = _REDPayloadType;
394   return 0;
395 }
396 
397 // Send a TelephoneEvent tone using RFC 2833 (4733)
SendTelephoneEvent(uint8_t key,uint16_t time_ms,uint8_t level)398 int32_t RTPSenderAudio::SendTelephoneEvent(uint8_t key,
399                                            uint16_t time_ms,
400                                            uint8_t level) {
401   {
402     CriticalSectionScoped lock(_sendAudioCritsect.get());
403     if (_dtmfPayloadType < 0) {
404       // TelephoneEvent payloadtype not configured
405       return -1;
406     }
407   }
408   return AddDTMF(key, time_ms, level);
409 }
410 
SendTelephoneEventPacket(bool ended,int8_t dtmf_payload_type,uint32_t dtmfTimeStamp,uint16_t duration,bool markerBit)411 int32_t RTPSenderAudio::SendTelephoneEventPacket(bool ended,
412                                                  int8_t dtmf_payload_type,
413                                                  uint32_t dtmfTimeStamp,
414                                                  uint16_t duration,
415                                                  bool markerBit) {
416   uint8_t dtmfbuffer[IP_PACKET_SIZE];
417   uint8_t sendCount = 1;
418   int32_t retVal = 0;
419 
420   if (ended) {
421     // resend last packet in an event 3 times
422     sendCount = 3;
423   }
424   do {
425     // Send DTMF data
426     _rtpSender->BuildRTPheader(dtmfbuffer, dtmf_payload_type, markerBit,
427                                dtmfTimeStamp, _clock->TimeInMilliseconds());
428 
429     // reset CSRC and X bit
430     dtmfbuffer[0] &= 0xe0;
431 
432     // Create DTMF data
433     /*    From RFC 2833:
434 
435      0                   1                   2                   3
436      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
437     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
438     |     event     |E|R| volume    |          duration             |
439     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
440     */
441     // R bit always cleared
442     uint8_t R = 0x00;
443     uint8_t volume = _dtmfLevel;
444 
445     // First packet un-ended
446     uint8_t E = ended ? 0x80 : 0x00;
447 
448     // First byte is Event number, equals key number
449     dtmfbuffer[12] = _dtmfKey;
450     dtmfbuffer[13] = E | R | volume;
451     ByteWriter<uint16_t>::WriteBigEndian(dtmfbuffer + 14, duration);
452 
453     TRACE_EVENT_INSTANT2(TRACE_DISABLED_BY_DEFAULT("webrtc_rtp"),
454                          "Audio::SendTelephoneEvent", "timestamp",
455                          dtmfTimeStamp, "seqnum", _rtpSender->SequenceNumber());
456     retVal = _rtpSender->SendToNetwork(
457         dtmfbuffer, 4, 12, TickTime::MillisecondTimestamp(),
458         kAllowRetransmission, RtpPacketSender::kHighPriority);
459     sendCount--;
460   } while (sendCount > 0 && retVal == 0);
461 
462   return retVal;
463 }
464 }  // namespace webrtc
465