1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "audio/utility/audio_frame_operations.h" 12 13 #include <string.h> 14 15 #include <algorithm> 16 #include <cstdint> 17 #include <utility> 18 19 #include "common_audio/include/audio_util.h" 20 #include "rtc_base/checks.h" 21 #include "rtc_base/numerics/safe_conversions.h" 22 23 namespace webrtc { 24 namespace { 25 26 // 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz. 27 const size_t kMuteFadeFrames = 128; 28 const float kMuteFadeInc = 1.0f / kMuteFadeFrames; 29 30 } // namespace 31 Add(const AudioFrame & frame_to_add,AudioFrame * result_frame)32 void AudioFrameOperations::Add(const AudioFrame& frame_to_add, 33 AudioFrame* result_frame) { 34 // Sanity check. 35 RTC_DCHECK(result_frame); 36 RTC_DCHECK_GT(result_frame->num_channels_, 0); 37 RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_); 38 39 bool no_previous_data = result_frame->muted(); 40 if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) { 41 // Special case we have no data to start with. 42 RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0); 43 result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_; 44 no_previous_data = true; 45 } 46 47 if (result_frame->vad_activity_ == AudioFrame::kVadActive || 48 frame_to_add.vad_activity_ == AudioFrame::kVadActive) { 49 result_frame->vad_activity_ = AudioFrame::kVadActive; 50 } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown || 51 frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) { 52 result_frame->vad_activity_ = AudioFrame::kVadUnknown; 53 } 54 55 if (result_frame->speech_type_ != frame_to_add.speech_type_) 56 result_frame->speech_type_ = AudioFrame::kUndefined; 57 58 if (!frame_to_add.muted()) { 59 const int16_t* in_data = frame_to_add.data(); 60 int16_t* out_data = result_frame->mutable_data(); 61 size_t length = 62 frame_to_add.samples_per_channel_ * frame_to_add.num_channels_; 63 if (no_previous_data) { 64 std::copy(in_data, in_data + length, out_data); 65 } else { 66 for (size_t i = 0; i < length; i++) { 67 const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) + 68 static_cast<int32_t>(in_data[i]); 69 out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard); 70 } 71 } 72 } 73 } 74 MonoToStereo(AudioFrame * frame)75 int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { 76 if (frame->num_channels_ != 1) { 77 return -1; 78 } 79 UpmixChannels(2, frame); 80 return 0; 81 } 82 StereoToMono(AudioFrame * frame)83 int AudioFrameOperations::StereoToMono(AudioFrame* frame) { 84 if (frame->num_channels_ != 2) { 85 return -1; 86 } 87 DownmixChannels(1, frame); 88 return frame->num_channels_ == 1 ? 0 : -1; 89 } 90 QuadToStereo(const int16_t * src_audio,size_t samples_per_channel,int16_t * dst_audio)91 void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, 92 size_t samples_per_channel, 93 int16_t* dst_audio) { 94 for (size_t i = 0; i < samples_per_channel; i++) { 95 dst_audio[i * 2] = 96 (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1; 97 dst_audio[i * 2 + 1] = 98 (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >> 99 1; 100 } 101 } 102 QuadToStereo(AudioFrame * frame)103 int AudioFrameOperations::QuadToStereo(AudioFrame* frame) { 104 if (frame->num_channels_ != 4) { 105 return -1; 106 } 107 108 RTC_DCHECK_LE(frame->samples_per_channel_ * 4, 109 AudioFrame::kMaxDataSizeSamples); 110 111 if (!frame->muted()) { 112 QuadToStereo(frame->data(), frame->samples_per_channel_, 113 frame->mutable_data()); 114 } 115 frame->num_channels_ = 2; 116 117 return 0; 118 } 119 DownmixChannels(const int16_t * src_audio,size_t src_channels,size_t samples_per_channel,size_t dst_channels,int16_t * dst_audio)120 void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, 121 size_t src_channels, 122 size_t samples_per_channel, 123 size_t dst_channels, 124 int16_t* dst_audio) { 125 if (src_channels > 1 && dst_channels == 1) { 126 DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels, 127 dst_audio); 128 return; 129 } else if (src_channels == 4 && dst_channels == 2) { 130 QuadToStereo(src_audio, samples_per_channel, dst_audio); 131 return; 132 } 133 134 RTC_NOTREACHED() << "src_channels: " << src_channels 135 << ", dst_channels: " << dst_channels; 136 } 137 DownmixChannels(size_t dst_channels,AudioFrame * frame)138 void AudioFrameOperations::DownmixChannels(size_t dst_channels, 139 AudioFrame* frame) { 140 RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_, 141 AudioFrame::kMaxDataSizeSamples); 142 if (frame->num_channels_ > 1 && dst_channels == 1) { 143 if (!frame->muted()) { 144 DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_, 145 frame->num_channels_, frame->mutable_data()); 146 } 147 frame->num_channels_ = 1; 148 } else if (frame->num_channels_ == 4 && dst_channels == 2) { 149 int err = QuadToStereo(frame); 150 RTC_DCHECK_EQ(err, 0); 151 } else { 152 RTC_NOTREACHED() << "src_channels: " << frame->num_channels_ 153 << ", dst_channels: " << dst_channels; 154 } 155 } 156 UpmixChannels(size_t target_number_of_channels,AudioFrame * frame)157 void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels, 158 AudioFrame* frame) { 159 RTC_DCHECK_EQ(frame->num_channels_, 1); 160 RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels, 161 AudioFrame::kMaxDataSizeSamples); 162 163 if (frame->num_channels_ != 1 || 164 frame->samples_per_channel_ * target_number_of_channels > 165 AudioFrame::kMaxDataSizeSamples) { 166 return; 167 } 168 169 if (!frame->muted()) { 170 // Up-mixing done in place. Going backwards through the frame ensure nothing 171 // is irrevocably overwritten. 172 for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) { 173 for (size_t j = 0; j < target_number_of_channels; ++j) { 174 frame->mutable_data()[target_number_of_channels * i + j] = 175 frame->data()[i]; 176 } 177 } 178 } 179 frame->num_channels_ = target_number_of_channels; 180 } 181 SwapStereoChannels(AudioFrame * frame)182 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { 183 RTC_DCHECK(frame); 184 if (frame->num_channels_ != 2 || frame->muted()) { 185 return; 186 } 187 188 int16_t* frame_data = frame->mutable_data(); 189 for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { 190 std::swap(frame_data[i], frame_data[i + 1]); 191 } 192 } 193 Mute(AudioFrame * frame,bool previous_frame_muted,bool current_frame_muted)194 void AudioFrameOperations::Mute(AudioFrame* frame, 195 bool previous_frame_muted, 196 bool current_frame_muted) { 197 RTC_DCHECK(frame); 198 if (!previous_frame_muted && !current_frame_muted) { 199 // Not muted, don't touch. 200 } else if (previous_frame_muted && current_frame_muted) { 201 // Frame fully muted. 202 size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; 203 RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); 204 frame->Mute(); 205 } else { 206 // Fade is a no-op on a muted frame. 207 if (frame->muted()) { 208 return; 209 } 210 211 // Limit number of samples to fade, if frame isn't long enough. 212 size_t count = kMuteFadeFrames; 213 float inc = kMuteFadeInc; 214 if (frame->samples_per_channel_ < kMuteFadeFrames) { 215 count = frame->samples_per_channel_; 216 if (count > 0) { 217 inc = 1.0f / count; 218 } 219 } 220 221 size_t start = 0; 222 size_t end = count; 223 float start_g = 0.0f; 224 if (current_frame_muted) { 225 // Fade out the last |count| samples of frame. 226 RTC_DCHECK(!previous_frame_muted); 227 start = frame->samples_per_channel_ - count; 228 end = frame->samples_per_channel_; 229 start_g = 1.0f; 230 inc = -inc; 231 } else { 232 // Fade in the first |count| samples of frame. 233 RTC_DCHECK(previous_frame_muted); 234 } 235 236 // Perform fade. 237 int16_t* frame_data = frame->mutable_data(); 238 size_t channels = frame->num_channels_; 239 for (size_t j = 0; j < channels; ++j) { 240 float g = start_g; 241 for (size_t i = start * channels; i < end * channels; i += channels) { 242 g += inc; 243 frame_data[i + j] *= g; 244 } 245 } 246 } 247 } 248 Mute(AudioFrame * frame)249 void AudioFrameOperations::Mute(AudioFrame* frame) { 250 Mute(frame, true, true); 251 } 252 ApplyHalfGain(AudioFrame * frame)253 void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) { 254 RTC_DCHECK(frame); 255 RTC_DCHECK_GT(frame->num_channels_, 0); 256 if (frame->num_channels_ < 1 || frame->muted()) { 257 return; 258 } 259 260 int16_t* frame_data = frame->mutable_data(); 261 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; 262 i++) { 263 frame_data[i] = frame_data[i] >> 1; 264 } 265 } 266 Scale(float left,float right,AudioFrame * frame)267 int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) { 268 if (frame->num_channels_ != 2) { 269 return -1; 270 } else if (frame->muted()) { 271 return 0; 272 } 273 274 int16_t* frame_data = frame->mutable_data(); 275 for (size_t i = 0; i < frame->samples_per_channel_; i++) { 276 frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]); 277 frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]); 278 } 279 return 0; 280 } 281 ScaleWithSat(float scale,AudioFrame * frame)282 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) { 283 if (frame->muted()) { 284 return 0; 285 } 286 287 int16_t* frame_data = frame->mutable_data(); 288 for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; 289 i++) { 290 frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]); 291 } 292 return 0; 293 } 294 } // namespace webrtc 295