1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_coding/neteq/preemptive_expand.h"
12 
13 #include <algorithm>
14 
15 #include "api/array_view.h"
16 #include "modules/audio_coding/neteq/audio_multi_vector.h"
17 #include "modules/audio_coding/neteq/time_stretch.h"
18 
19 namespace webrtc {
20 
Process(const int16_t * input,size_t input_length,size_t old_data_length,AudioMultiVector * output,size_t * length_change_samples)21 PreemptiveExpand::ReturnCodes PreemptiveExpand::Process(
22     const int16_t* input,
23     size_t input_length,
24     size_t old_data_length,
25     AudioMultiVector* output,
26     size_t* length_change_samples) {
27   old_data_length_per_channel_ = old_data_length;
28   // Input length must be (almost) 30 ms.
29   // Also, the new part must be at least |overlap_samples_| elements.
30   static const size_t k15ms = 120;  // 15 ms = 120 samples at 8 kHz sample rate.
31   if (num_channels_ == 0 ||
32       input_length / num_channels_ < (2 * k15ms - 1) * fs_mult_ ||
33       old_data_length >= input_length / num_channels_ - overlap_samples_) {
34     // Length of input data too short to do preemptive expand. Simply move all
35     // data from input to output.
36     output->PushBackInterleaved(
37         rtc::ArrayView<const int16_t>(input, input_length));
38     return kError;
39   }
40   const bool kFastMode = false;  // Fast mode is not available for PE Expand.
41   return TimeStretch::Process(input, input_length, kFastMode, output,
42                               length_change_samples);
43 }
44 
SetParametersForPassiveSpeech(size_t len,int16_t * best_correlation,size_t * peak_index) const45 void PreemptiveExpand::SetParametersForPassiveSpeech(size_t len,
46                                                      int16_t* best_correlation,
47                                                      size_t* peak_index) const {
48   // When the signal does not contain any active speech, the correlation does
49   // not matter. Simply set it to zero.
50   *best_correlation = 0;
51 
52   // For low energy expansion, the new data can be less than 15 ms,
53   // but we must ensure that best_correlation is not larger than the length of
54   // the new data.
55   // but we must ensure that best_correlation is not larger than the new data.
56   *peak_index = std::min(*peak_index, len - old_data_length_per_channel_);
57 }
58 
CheckCriteriaAndStretch(const int16_t * input,size_t input_length,size_t peak_index,int16_t best_correlation,bool active_speech,bool,AudioMultiVector * output) const59 PreemptiveExpand::ReturnCodes PreemptiveExpand::CheckCriteriaAndStretch(
60     const int16_t* input,
61     size_t input_length,
62     size_t peak_index,
63     int16_t best_correlation,
64     bool active_speech,
65     bool /*fast_mode*/,
66     AudioMultiVector* output) const {
67   // Pre-calculate common multiplication with |fs_mult_|.
68   // 120 corresponds to 15 ms.
69   size_t fs_mult_120 = static_cast<size_t>(fs_mult_ * 120);
70   // Check for strong correlation (>0.9 in Q14) and at least 15 ms new data,
71   // or passive speech.
72   if (((best_correlation > kCorrelationThreshold) &&
73        (old_data_length_per_channel_ <= fs_mult_120)) ||
74       !active_speech) {
75     // Do accelerate operation by overlap add.
76 
77     // Set length of the first part, not to be modified.
78     size_t unmodified_length =
79         std::max(old_data_length_per_channel_, fs_mult_120);
80     // Copy first part, including cross-fade region.
81     output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
82         input, (unmodified_length + peak_index) * num_channels_));
83     // Copy the last |peak_index| samples up to 15 ms to |temp_vector|.
84     AudioMultiVector temp_vector(num_channels_);
85     temp_vector.PushBackInterleaved(rtc::ArrayView<const int16_t>(
86         &input[(unmodified_length - peak_index) * num_channels_],
87         peak_index * num_channels_));
88     // Cross-fade |temp_vector| onto the end of |output|.
89     output->CrossFade(temp_vector, peak_index);
90     // Copy the last unmodified part, 15 ms + pitch period until the end.
91     output->PushBackInterleaved(rtc::ArrayView<const int16_t>(
92         &input[unmodified_length * num_channels_],
93         input_length - unmodified_length * num_channels_));
94 
95     if (active_speech) {
96       return kSuccess;
97     } else {
98       return kSuccessLowEnergy;
99     }
100   } else {
101     // Accelerate not allowed. Simply move all data from decoded to outData.
102     output->PushBackInterleaved(
103         rtc::ArrayView<const int16_t>(input, input_length));
104     return kNoStretch;
105   }
106 }
107 
Create(int sample_rate_hz,size_t num_channels,const BackgroundNoise & background_noise,size_t overlap_samples) const108 PreemptiveExpand* PreemptiveExpandFactory::Create(
109     int sample_rate_hz,
110     size_t num_channels,
111     const BackgroundNoise& background_noise,
112     size_t overlap_samples) const {
113   return new PreemptiveExpand(sample_rate_hz, num_channels, background_noise,
114                               overlap_samples);
115 }
116 
117 }  // namespace webrtc
118