1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "webrtc/modules/audio_coding/codecs/opus/opus_interface.h"
12 #include "webrtc/modules/audio_coding/codecs/opus/opus_inst.h"
13 
14 #include <assert.h>
15 #include <stdlib.h>
16 #include <string.h>
17 
18 enum {
19   /* Maximum supported frame size in WebRTC is 60 ms. */
20   kWebRtcOpusMaxEncodeFrameSizeMs = 60,
21 
22   /* The format allows up to 120 ms frames. Since we don't control the other
23    * side, we must allow for packets of that size. NetEq is currently limited
24    * to 60 ms on the receive side. */
25   kWebRtcOpusMaxDecodeFrameSizeMs = 120,
26 
27   /* Maximum sample count per channel is 48 kHz * maximum frame size in
28    * milliseconds. */
29   kWebRtcOpusMaxFrameSizePerChannel = 48 * kWebRtcOpusMaxDecodeFrameSizeMs,
30 
31   /* Default frame size, 20 ms @ 48 kHz, in samples (for one channel). */
32   kWebRtcOpusDefaultFrameSize = 960,
33 
34   // Maximum number of consecutive zeros, beyond or equal to which DTX can fail.
35   kZeroBreakCount = 157,
36 
37 #if defined(OPUS_FIXED_POINT)
38   kZeroBreakValue = 10,
39 #else
40   kZeroBreakValue = 1,
41 #endif
42 };
43 
WebRtcOpus_EncoderCreate(OpusEncInst ** inst,size_t channels,int32_t application)44 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
45                                  size_t channels,
46                                  int32_t application) {
47   int opus_app;
48   if (!inst)
49     return -1;
50 
51   switch (application) {
52     case 0:
53       opus_app = OPUS_APPLICATION_VOIP;
54       break;
55     case 1:
56       opus_app = OPUS_APPLICATION_AUDIO;
57       break;
58     default:
59       return -1;
60   }
61 
62   OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
63   assert(state);
64 
65   // Allocate zero counters.
66   state->zero_counts = calloc(channels, sizeof(size_t));
67   assert(state->zero_counts);
68 
69   int error;
70   state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
71                                        &error);
72   if (error != OPUS_OK || !state->encoder) {
73     WebRtcOpus_EncoderFree(state);
74     return -1;
75   }
76 
77   state->in_dtx_mode = 0;
78   state->channels = channels;
79 
80   *inst = state;
81   return 0;
82 }
83 
WebRtcOpus_EncoderFree(OpusEncInst * inst)84 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
85   if (inst) {
86     opus_encoder_destroy(inst->encoder);
87     free(inst->zero_counts);
88     free(inst);
89     return 0;
90   } else {
91     return -1;
92   }
93 }
94 
WebRtcOpus_Encode(OpusEncInst * inst,const int16_t * audio_in,size_t samples,size_t length_encoded_buffer,uint8_t * encoded)95 int WebRtcOpus_Encode(OpusEncInst* inst,
96                       const int16_t* audio_in,
97                       size_t samples,
98                       size_t length_encoded_buffer,
99                       uint8_t* encoded) {
100   int res;
101   size_t i;
102   size_t c;
103 
104   int16_t buffer[2 * 48 * kWebRtcOpusMaxEncodeFrameSizeMs];
105 
106   if (samples > 48 * kWebRtcOpusMaxEncodeFrameSizeMs) {
107     return -1;
108   }
109 
110   const size_t channels = inst->channels;
111   int use_buffer = 0;
112 
113   // Break long consecutive zeros by forcing a "1" every |kZeroBreakCount|
114   // samples.
115   if (inst->in_dtx_mode) {
116     for (i = 0; i < samples; ++i) {
117       for (c = 0; c < channels; ++c) {
118         if (audio_in[i * channels + c] == 0) {
119           ++inst->zero_counts[c];
120           if (inst->zero_counts[c] == kZeroBreakCount) {
121             if (!use_buffer) {
122               memcpy(buffer, audio_in, samples * channels * sizeof(int16_t));
123               use_buffer = 1;
124             }
125             buffer[i * channels + c] = kZeroBreakValue;
126             inst->zero_counts[c] = 0;
127           }
128         } else {
129           inst->zero_counts[c] = 0;
130         }
131       }
132     }
133   }
134 
135   res = opus_encode(inst->encoder,
136                     use_buffer ? buffer : audio_in,
137                     (int)samples,
138                     encoded,
139                     (opus_int32)length_encoded_buffer);
140 
141   if (res == 1) {
142     // Indicates DTX since the packet has nothing but a header. In principle,
143     // there is no need to send this packet. However, we do transmit the first
144     // occurrence to let the decoder know that the encoder enters DTX mode.
145     if (inst->in_dtx_mode) {
146       return 0;
147     } else {
148       inst->in_dtx_mode = 1;
149       return 1;
150     }
151   } else if (res > 1) {
152     inst->in_dtx_mode = 0;
153     return res;
154   }
155 
156   return -1;
157 }
158 
WebRtcOpus_SetBitRate(OpusEncInst * inst,int32_t rate)159 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
160   if (inst) {
161     return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
162   } else {
163     return -1;
164   }
165 }
166 
WebRtcOpus_SetPacketLossRate(OpusEncInst * inst,int32_t loss_rate)167 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
168   if (inst) {
169     return opus_encoder_ctl(inst->encoder,
170                             OPUS_SET_PACKET_LOSS_PERC(loss_rate));
171   } else {
172     return -1;
173   }
174 }
175 
WebRtcOpus_SetMaxPlaybackRate(OpusEncInst * inst,int32_t frequency_hz)176 int16_t WebRtcOpus_SetMaxPlaybackRate(OpusEncInst* inst, int32_t frequency_hz) {
177   opus_int32 set_bandwidth;
178 
179   if (!inst)
180     return -1;
181 
182   if (frequency_hz <= 8000) {
183     set_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
184   } else if (frequency_hz <= 12000) {
185     set_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
186   } else if (frequency_hz <= 16000) {
187     set_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
188   } else if (frequency_hz <= 24000) {
189     set_bandwidth = OPUS_BANDWIDTH_SUPERWIDEBAND;
190   } else {
191     set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
192   }
193   return opus_encoder_ctl(inst->encoder,
194                           OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
195 }
196 
WebRtcOpus_EnableFec(OpusEncInst * inst)197 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
198   if (inst) {
199     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
200   } else {
201     return -1;
202   }
203 }
204 
WebRtcOpus_DisableFec(OpusEncInst * inst)205 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
206   if (inst) {
207     return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
208   } else {
209     return -1;
210   }
211 }
212 
WebRtcOpus_EnableDtx(OpusEncInst * inst)213 int16_t WebRtcOpus_EnableDtx(OpusEncInst* inst) {
214   if (!inst) {
215     return -1;
216   }
217 
218   // To prevent Opus from entering CELT-only mode by forcing signal type to
219   // voice to make sure that DTX behaves correctly. Currently, DTX does not
220   // last long during a pure silence, if the signal type is not forced.
221   // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
222   // without it.
223   int ret = opus_encoder_ctl(inst->encoder,
224                              OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
225   if (ret != OPUS_OK)
226     return ret;
227 
228   return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
229 }
230 
WebRtcOpus_DisableDtx(OpusEncInst * inst)231 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
232   if (inst) {
233     int ret = opus_encoder_ctl(inst->encoder,
234                                OPUS_SET_SIGNAL(OPUS_AUTO));
235     if (ret != OPUS_OK)
236       return ret;
237     return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
238   } else {
239     return -1;
240   }
241 }
242 
WebRtcOpus_SetComplexity(OpusEncInst * inst,int32_t complexity)243 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
244   if (inst) {
245     return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
246   } else {
247     return -1;
248   }
249 }
250 
WebRtcOpus_DecoderCreate(OpusDecInst ** inst,size_t channels)251 int16_t WebRtcOpus_DecoderCreate(OpusDecInst** inst, size_t channels) {
252   int error;
253   OpusDecInst* state;
254 
255   if (inst != NULL) {
256     /* Create Opus decoder state. */
257     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
258     if (state == NULL) {
259       return -1;
260     }
261 
262     /* Create new memory, always at 48000 Hz. */
263     state->decoder = opus_decoder_create(48000, (int)channels, &error);
264     if (error == OPUS_OK && state->decoder != NULL) {
265       /* Creation of memory all ok. */
266       state->channels = channels;
267       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
268       state->in_dtx_mode = 0;
269       *inst = state;
270       return 0;
271     }
272 
273     /* If memory allocation was unsuccessful, free the entire state. */
274     if (state->decoder) {
275       opus_decoder_destroy(state->decoder);
276     }
277     free(state);
278   }
279   return -1;
280 }
281 
WebRtcOpus_DecoderFree(OpusDecInst * inst)282 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
283   if (inst) {
284     opus_decoder_destroy(inst->decoder);
285     free(inst);
286     return 0;
287   } else {
288     return -1;
289   }
290 }
291 
WebRtcOpus_DecoderChannels(OpusDecInst * inst)292 size_t WebRtcOpus_DecoderChannels(OpusDecInst* inst) {
293   return inst->channels;
294 }
295 
WebRtcOpus_DecoderInit(OpusDecInst * inst)296 void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
297   opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
298   inst->in_dtx_mode = 0;
299 }
300 
301 /* For decoder to determine if it is to output speech or comfort noise. */
DetermineAudioType(OpusDecInst * inst,size_t encoded_bytes)302 static int16_t DetermineAudioType(OpusDecInst* inst, size_t encoded_bytes) {
303   // Audio type becomes comfort noise if |encoded_byte| is 1 and keeps
304   // to be so if the following |encoded_byte| are 0 or 1.
305   if (encoded_bytes == 0 && inst->in_dtx_mode) {
306     return 2;  // Comfort noise.
307   } else if (encoded_bytes == 1) {
308     inst->in_dtx_mode = 1;
309     return 2;  // Comfort noise.
310   } else {
311     inst->in_dtx_mode = 0;
312     return 0;  // Speech.
313   }
314 }
315 
316 /* |frame_size| is set to maximum Opus frame size in the normal case, and
317  * is set to the number of samples needed for PLC in case of losses.
318  * It is up to the caller to make sure the value is correct. */
DecodeNative(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int frame_size,int16_t * decoded,int16_t * audio_type,int decode_fec)319 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
320                         size_t encoded_bytes, int frame_size,
321                         int16_t* decoded, int16_t* audio_type, int decode_fec) {
322   int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
323                         (opus_int16*)decoded, frame_size, decode_fec);
324 
325   if (res <= 0)
326     return -1;
327 
328   *audio_type = DetermineAudioType(inst, encoded_bytes);
329 
330   return res;
331 }
332 
WebRtcOpus_Decode(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)333 int WebRtcOpus_Decode(OpusDecInst* inst, const uint8_t* encoded,
334                       size_t encoded_bytes, int16_t* decoded,
335                       int16_t* audio_type) {
336   int decoded_samples;
337 
338   if (encoded_bytes == 0) {
339     *audio_type = DetermineAudioType(inst, encoded_bytes);
340     decoded_samples = WebRtcOpus_DecodePlc(inst, decoded, 1);
341   } else {
342     decoded_samples = DecodeNative(inst,
343                                    encoded,
344                                    encoded_bytes,
345                                    kWebRtcOpusMaxFrameSizePerChannel,
346                                    decoded,
347                                    audio_type,
348                                    0);
349   }
350   if (decoded_samples < 0) {
351     return -1;
352   }
353 
354   /* Update decoded sample memory, to be used by the PLC in case of losses. */
355   inst->prev_decoded_samples = decoded_samples;
356 
357   return decoded_samples;
358 }
359 
WebRtcOpus_DecodePlc(OpusDecInst * inst,int16_t * decoded,int number_of_lost_frames)360 int WebRtcOpus_DecodePlc(OpusDecInst* inst, int16_t* decoded,
361                          int number_of_lost_frames) {
362   int16_t audio_type = 0;
363   int decoded_samples;
364   int plc_samples;
365 
366   /* The number of samples we ask for is |number_of_lost_frames| times
367    * |prev_decoded_samples_|. Limit the number of samples to maximum
368    * |kWebRtcOpusMaxFrameSizePerChannel|. */
369   plc_samples = number_of_lost_frames * inst->prev_decoded_samples;
370   plc_samples = (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
371       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
372   decoded_samples = DecodeNative(inst, NULL, 0, plc_samples,
373                                  decoded, &audio_type, 0);
374   if (decoded_samples < 0) {
375     return -1;
376   }
377 
378   return decoded_samples;
379 }
380 
WebRtcOpus_DecodeFec(OpusDecInst * inst,const uint8_t * encoded,size_t encoded_bytes,int16_t * decoded,int16_t * audio_type)381 int WebRtcOpus_DecodeFec(OpusDecInst* inst, const uint8_t* encoded,
382                          size_t encoded_bytes, int16_t* decoded,
383                          int16_t* audio_type) {
384   int decoded_samples;
385   int fec_samples;
386 
387   if (WebRtcOpus_PacketHasFec(encoded, encoded_bytes) != 1) {
388     return 0;
389   }
390 
391   fec_samples = opus_packet_get_samples_per_frame(encoded, 48000);
392 
393   decoded_samples = DecodeNative(inst, encoded, encoded_bytes,
394                                  fec_samples, decoded, audio_type, 1);
395   if (decoded_samples < 0) {
396     return -1;
397   }
398 
399   return decoded_samples;
400 }
401 
WebRtcOpus_DurationEst(OpusDecInst * inst,const uint8_t * payload,size_t payload_length_bytes)402 int WebRtcOpus_DurationEst(OpusDecInst* inst,
403                            const uint8_t* payload,
404                            size_t payload_length_bytes) {
405   if (payload_length_bytes == 0) {
406     // WebRtcOpus_Decode calls PLC when payload length is zero. So we return
407     // PLC duration correspondingly.
408     return WebRtcOpus_PlcDuration(inst);
409   }
410 
411   int frames, samples;
412   frames = opus_packet_get_nb_frames(payload, (opus_int32)payload_length_bytes);
413   if (frames < 0) {
414     /* Invalid payload data. */
415     return 0;
416   }
417   samples = frames * opus_packet_get_samples_per_frame(payload, 48000);
418   if (samples < 120 || samples > 5760) {
419     /* Invalid payload duration. */
420     return 0;
421   }
422   return samples;
423 }
424 
WebRtcOpus_PlcDuration(OpusDecInst * inst)425 int WebRtcOpus_PlcDuration(OpusDecInst* inst) {
426   /* The number of samples we ask for is |number_of_lost_frames| times
427    * |prev_decoded_samples_|. Limit the number of samples to maximum
428    * |kWebRtcOpusMaxFrameSizePerChannel|. */
429   const int plc_samples = inst->prev_decoded_samples;
430   return (plc_samples <= kWebRtcOpusMaxFrameSizePerChannel) ?
431       plc_samples : kWebRtcOpusMaxFrameSizePerChannel;
432 }
433 
WebRtcOpus_FecDurationEst(const uint8_t * payload,size_t payload_length_bytes)434 int WebRtcOpus_FecDurationEst(const uint8_t* payload,
435                               size_t payload_length_bytes) {
436   int samples;
437   if (WebRtcOpus_PacketHasFec(payload, payload_length_bytes) != 1) {
438     return 0;
439   }
440 
441   samples = opus_packet_get_samples_per_frame(payload, 48000);
442   if (samples < 480 || samples > 5760) {
443     /* Invalid payload duration. */
444     return 0;
445   }
446   return samples;
447 }
448 
WebRtcOpus_PacketHasFec(const uint8_t * payload,size_t payload_length_bytes)449 int WebRtcOpus_PacketHasFec(const uint8_t* payload,
450                             size_t payload_length_bytes) {
451   int frames, channels, payload_length_ms;
452   int n;
453   opus_int16 frame_sizes[48];
454   const unsigned char *frame_data[48];
455 
456   if (payload == NULL || payload_length_bytes == 0)
457     return 0;
458 
459   /* In CELT_ONLY mode, packets should not have FEC. */
460   if (payload[0] & 0x80)
461     return 0;
462 
463   payload_length_ms = opus_packet_get_samples_per_frame(payload, 48000) / 48;
464   if (10 > payload_length_ms)
465     payload_length_ms = 10;
466 
467   channels = opus_packet_get_nb_channels(payload);
468 
469   switch (payload_length_ms) {
470     case 10:
471     case 20: {
472       frames = 1;
473       break;
474     }
475     case 40: {
476       frames = 2;
477       break;
478     }
479     case 60: {
480       frames = 3;
481       break;
482     }
483     default: {
484       return 0; // It is actually even an invalid packet.
485     }
486   }
487 
488   /* The following is to parse the LBRR flags. */
489   if (opus_packet_parse(payload, (opus_int32)payload_length_bytes, NULL,
490                         frame_data, frame_sizes, NULL) < 0) {
491     return 0;
492   }
493 
494   if (frame_sizes[0] <= 1) {
495     return 0;
496   }
497 
498   for (n = 0; n < channels; n++) {
499     if (frame_data[0][0] & (0x80 >> ((n + 1) * (frames + 1) - 1)))
500       return 1;
501   }
502 
503   return 0;
504 }
505