1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio/utility/audio_frame_operations.h"
12 
13 #include <string.h>
14 
15 #include <algorithm>
16 #include <cstdint>
17 #include <utility>
18 
19 #include "common_audio/include/audio_util.h"
20 #include "rtc_base/checks.h"
21 #include "rtc_base/numerics/safe_conversions.h"
22 
23 namespace webrtc {
24 namespace {
25 
26 // 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz.
27 const size_t kMuteFadeFrames = 128;
28 const float kMuteFadeInc = 1.0f / kMuteFadeFrames;
29 
30 }  // namespace
31 
Add(const AudioFrame & frame_to_add,AudioFrame * result_frame)32 void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
33                                AudioFrame* result_frame) {
34   // Sanity check.
35   RTC_DCHECK(result_frame);
36   RTC_DCHECK_GT(result_frame->num_channels_, 0);
37   RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
38 
39   bool no_previous_data = result_frame->muted();
40   if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
41     // Special case we have no data to start with.
42     RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
43     result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_;
44     no_previous_data = true;
45   }
46 
47   if (result_frame->vad_activity_ == AudioFrame::kVadActive ||
48       frame_to_add.vad_activity_ == AudioFrame::kVadActive) {
49     result_frame->vad_activity_ = AudioFrame::kVadActive;
50   } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown ||
51              frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) {
52     result_frame->vad_activity_ = AudioFrame::kVadUnknown;
53   }
54 
55   if (result_frame->speech_type_ != frame_to_add.speech_type_)
56     result_frame->speech_type_ = AudioFrame::kUndefined;
57 
58   if (!frame_to_add.muted()) {
59     const int16_t* in_data = frame_to_add.data();
60     int16_t* out_data = result_frame->mutable_data();
61     size_t length =
62         frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
63     if (no_previous_data) {
64       std::copy(in_data, in_data + length, out_data);
65     } else {
66       for (size_t i = 0; i < length; i++) {
67         const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
68                                    static_cast<int32_t>(in_data[i]);
69         out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
70       }
71     }
72   }
73 }
74 
MonoToStereo(AudioFrame * frame)75 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
76   if (frame->num_channels_ != 1) {
77     return -1;
78   }
79   UpmixChannels(2, frame);
80   return 0;
81 }
82 
StereoToMono(AudioFrame * frame)83 int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
84   if (frame->num_channels_ != 2) {
85     return -1;
86   }
87   DownmixChannels(1, frame);
88   return frame->num_channels_ == 1 ? 0 : -1;
89 }
90 
QuadToStereo(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)91 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
92                                         size_t samples_per_channel,
93                                         int16_t* dst_audio) {
94   for (size_t i = 0; i < samples_per_channel; i++) {
95     dst_audio[i * 2] =
96         (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1;
97     dst_audio[i * 2 + 1] =
98         (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >>
99         1;
100   }
101 }
102 
QuadToStereo(AudioFrame * frame)103 int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
104   if (frame->num_channels_ != 4) {
105     return -1;
106   }
107 
108   RTC_DCHECK_LE(frame->samples_per_channel_ * 4,
109                 AudioFrame::kMaxDataSizeSamples);
110 
111   if (!frame->muted()) {
112     QuadToStereo(frame->data(), frame->samples_per_channel_,
113                  frame->mutable_data());
114   }
115   frame->num_channels_ = 2;
116 
117   return 0;
118 }
119 
DownmixChannels(const int16_t * src_audio,size_t src_channels,size_t samples_per_channel,size_t dst_channels,int16_t * dst_audio)120 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
121                                            size_t src_channels,
122                                            size_t samples_per_channel,
123                                            size_t dst_channels,
124                                            int16_t* dst_audio) {
125   if (src_channels > 1 && dst_channels == 1) {
126     DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels,
127                              dst_audio);
128     return;
129   } else if (src_channels == 4 && dst_channels == 2) {
130     QuadToStereo(src_audio, samples_per_channel, dst_audio);
131     return;
132   }
133 
134   RTC_NOTREACHED() << "src_channels: " << src_channels
135                    << ", dst_channels: " << dst_channels;
136 }
137 
DownmixChannels(size_t dst_channels,AudioFrame * frame)138 void AudioFrameOperations::DownmixChannels(size_t dst_channels,
139                                            AudioFrame* frame) {
140   RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_,
141                 AudioFrame::kMaxDataSizeSamples);
142   if (frame->num_channels_ > 1 && dst_channels == 1) {
143     if (!frame->muted()) {
144       DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_,
145                                frame->num_channels_, frame->mutable_data());
146     }
147     frame->num_channels_ = 1;
148   } else if (frame->num_channels_ == 4 && dst_channels == 2) {
149     int err = QuadToStereo(frame);
150     RTC_DCHECK_EQ(err, 0);
151   } else {
152     RTC_NOTREACHED() << "src_channels: " << frame->num_channels_
153                      << ", dst_channels: " << dst_channels;
154   }
155 }
156 
UpmixChannels(size_t target_number_of_channels,AudioFrame * frame)157 void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels,
158                                          AudioFrame* frame) {
159   RTC_DCHECK_EQ(frame->num_channels_, 1);
160   RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels,
161                 AudioFrame::kMaxDataSizeSamples);
162 
163   if (frame->num_channels_ != 1 ||
164       frame->samples_per_channel_ * target_number_of_channels >
165           AudioFrame::kMaxDataSizeSamples) {
166     return;
167   }
168 
169   if (!frame->muted()) {
170     // Up-mixing done in place. Going backwards through the frame ensure nothing
171     // is irrevocably overwritten.
172     for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) {
173       for (size_t j = 0; j < target_number_of_channels; ++j) {
174         frame->mutable_data()[target_number_of_channels * i + j] =
175             frame->data()[i];
176       }
177     }
178   }
179   frame->num_channels_ = target_number_of_channels;
180 }
181 
SwapStereoChannels(AudioFrame * frame)182 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
183   RTC_DCHECK(frame);
184   if (frame->num_channels_ != 2 || frame->muted()) {
185     return;
186   }
187 
188   int16_t* frame_data = frame->mutable_data();
189   for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) {
190     std::swap(frame_data[i], frame_data[i + 1]);
191   }
192 }
193 
Mute(AudioFrame * frame,bool previous_frame_muted,bool current_frame_muted)194 void AudioFrameOperations::Mute(AudioFrame* frame,
195                                 bool previous_frame_muted,
196                                 bool current_frame_muted) {
197   RTC_DCHECK(frame);
198   if (!previous_frame_muted && !current_frame_muted) {
199     // Not muted, don't touch.
200   } else if (previous_frame_muted && current_frame_muted) {
201     // Frame fully muted.
202     size_t total_samples = frame->samples_per_channel_ * frame->num_channels_;
203     RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples);
204     frame->Mute();
205   } else {
206     // Fade is a no-op on a muted frame.
207     if (frame->muted()) {
208       return;
209     }
210 
211     // Limit number of samples to fade, if frame isn't long enough.
212     size_t count = kMuteFadeFrames;
213     float inc = kMuteFadeInc;
214     if (frame->samples_per_channel_ < kMuteFadeFrames) {
215       count = frame->samples_per_channel_;
216       if (count > 0) {
217         inc = 1.0f / count;
218       }
219     }
220 
221     size_t start = 0;
222     size_t end = count;
223     float start_g = 0.0f;
224     if (current_frame_muted) {
225       // Fade out the last |count| samples of frame.
226       RTC_DCHECK(!previous_frame_muted);
227       start = frame->samples_per_channel_ - count;
228       end = frame->samples_per_channel_;
229       start_g = 1.0f;
230       inc = -inc;
231     } else {
232       // Fade in the first |count| samples of frame.
233       RTC_DCHECK(previous_frame_muted);
234     }
235 
236     // Perform fade.
237     int16_t* frame_data = frame->mutable_data();
238     size_t channels = frame->num_channels_;
239     for (size_t j = 0; j < channels; ++j) {
240       float g = start_g;
241       for (size_t i = start * channels; i < end * channels; i += channels) {
242         g += inc;
243         frame_data[i + j] *= g;
244       }
245     }
246   }
247 }
248 
Mute(AudioFrame * frame)249 void AudioFrameOperations::Mute(AudioFrame* frame) {
250   Mute(frame, true, true);
251 }
252 
ApplyHalfGain(AudioFrame * frame)253 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
254   RTC_DCHECK(frame);
255   RTC_DCHECK_GT(frame->num_channels_, 0);
256   if (frame->num_channels_ < 1 || frame->muted()) {
257     return;
258   }
259 
260   int16_t* frame_data = frame->mutable_data();
261   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
262        i++) {
263     frame_data[i] = frame_data[i] >> 1;
264   }
265 }
266 
Scale(float left,float right,AudioFrame * frame)267 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
268   if (frame->num_channels_ != 2) {
269     return -1;
270   } else if (frame->muted()) {
271     return 0;
272   }
273 
274   int16_t* frame_data = frame->mutable_data();
275   for (size_t i = 0; i < frame->samples_per_channel_; i++) {
276     frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
277     frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
278   }
279   return 0;
280 }
281 
ScaleWithSat(float scale,AudioFrame * frame)282 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
283   if (frame->muted()) {
284     return 0;
285   }
286 
287   int16_t* frame_data = frame->mutable_data();
288   for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
289        i++) {
290     frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]);
291   }
292   return 0;
293 }
294 }  // namespace webrtc
295