1 /*
2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 
12 /*
13  * This header file includes all of the fix point signal processing library (SPL) function
14  * descriptions and declarations.
15  * For specific function calls, see bottom of file.
16  */
17 
18 #ifndef WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
19 #define WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
20 
21 #include <string.h>
22 #include "webrtc/typedefs.h"
23 
24 // Macros specific for the fixed point implementation
25 #define WEBRTC_SPL_WORD16_MAX       32767
26 #define WEBRTC_SPL_WORD16_MIN       -32768
27 #define WEBRTC_SPL_WORD32_MAX       (int32_t)0x7fffffff
28 #define WEBRTC_SPL_WORD32_MIN       (int32_t)0x80000000
29 #define WEBRTC_SPL_MAX_LPC_ORDER    14
30 #define WEBRTC_SPL_MIN(A, B)        (A < B ? A : B)  // Get min value
31 #define WEBRTC_SPL_MAX(A, B)        (A > B ? A : B)  // Get max value
32 // TODO(kma/bjorn): For the next two macros, investigate how to correct the code
33 // for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN.
34 #define WEBRTC_SPL_ABS_W16(a) \
35     (((int16_t)a >= 0) ? ((int16_t)a) : -((int16_t)a))
36 #define WEBRTC_SPL_ABS_W32(a) \
37     (((int32_t)a >= 0) ? ((int32_t)a) : -((int32_t)a))
38 
39 #define WEBRTC_SPL_MUL(a, b) \
40     ((int32_t) ((int32_t)(a) * (int32_t)(b)))
41 #define WEBRTC_SPL_UMUL(a, b) \
42     ((uint32_t) ((uint32_t)(a) * (uint32_t)(b)))
43 #define WEBRTC_SPL_UMUL_32_16(a, b) \
44     ((uint32_t) ((uint32_t)(a) * (uint16_t)(b)))
45 #define WEBRTC_SPL_MUL_16_U16(a, b) \
46     ((int32_t)(int16_t)(a) * (uint16_t)(b))
47 
48 #ifndef WEBRTC_ARCH_ARM_V7
49 // For ARMv7 platforms, these are inline functions in spl_inl_armv7.h
50 #ifndef MIPS32_LE
51 // For MIPS platforms, these are inline functions in spl_inl_mips.h
52 #define WEBRTC_SPL_MUL_16_16(a, b) \
53     ((int32_t) (((int16_t)(a)) * ((int16_t)(b))))
54 #define WEBRTC_SPL_MUL_16_32_RSFT16(a, b) \
55     (WEBRTC_SPL_MUL_16_16(a, b >> 16) \
56      + ((WEBRTC_SPL_MUL_16_16(a, (b & 0xffff) >> 1) + 0x4000) >> 15))
57 #endif
58 #endif
59 
60 #define WEBRTC_SPL_MUL_16_32_RSFT11(a, b) \
61     ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 5) \
62     + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x0200) >> 10))
63 #define WEBRTC_SPL_MUL_16_32_RSFT14(a, b) \
64     ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 2) \
65     + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x1000) >> 13))
66 #define WEBRTC_SPL_MUL_16_32_RSFT15(a, b) \
67     ((WEBRTC_SPL_MUL_16_16(a, (b) >> 16) << 1) \
68     + (((WEBRTC_SPL_MUL_16_U16(a, (uint16_t)(b)) >> 1) + 0x2000) >> 14))
69 
70 #define WEBRTC_SPL_MUL_16_16_RSFT(a, b, c) \
71     (WEBRTC_SPL_MUL_16_16(a, b) >> (c))
72 
73 #define WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(a, b, c) \
74     ((WEBRTC_SPL_MUL_16_16(a, b) + ((int32_t) \
75                                   (((int32_t)1) << ((c) - 1)))) >> (c))
76 
77 // C + the 32 most significant bits of A * B
78 #define WEBRTC_SPL_SCALEDIFF32(A, B, C) \
79     (C + (B >> 16) * A + (((uint32_t)(0x0000FFFF & B) * A) >> 16))
80 
81 #define WEBRTC_SPL_SAT(a, b, c)         (b > a ? a : b < c ? c : b)
82 
83 // Shifting with negative numbers allowed
84 // Positive means left shift
85 #define WEBRTC_SPL_SHIFT_W32(x, c) \
86     (((c) >= 0) ? ((x) << (c)) : ((x) >> (-(c))))
87 
88 // Shifting with negative numbers not allowed
89 // We cannot do casting here due to signed/unsigned problem
90 #define WEBRTC_SPL_LSHIFT_W32(x, c)     ((x) << (c))
91 
92 #define WEBRTC_SPL_RSHIFT_U32(x, c)     ((uint32_t)(x) >> (c))
93 
94 #define WEBRTC_SPL_RAND(a) \
95     ((int16_t)((((int16_t)a * 18816) >> 7) & 0x00007fff))
96 
97 #ifdef __cplusplus
98 extern "C" {
99 #endif
100 
101 #define WEBRTC_SPL_MEMCPY_W16(v1, v2, length) \
102   memcpy(v1, v2, (length) * sizeof(int16_t))
103 
104 // inline functions:
105 #include "webrtc/common_audio/signal_processing/include/spl_inl.h"
106 
107 // Initialize SPL. Currently it contains only function pointer initialization.
108 // If the underlying platform is known to be ARM-Neon (WEBRTC_HAS_NEON defined),
109 // the pointers will be assigned to code optimized for Neon; otherwise
110 // if run-time Neon detection (WEBRTC_DETECT_NEON) is enabled, the pointers
111 // will be assigned to either Neon code or generic C code; otherwise, generic C
112 // code will be assigned.
113 // Note that this function MUST be called in any application that uses SPL
114 // functions.
115 void WebRtcSpl_Init();
116 
117 int16_t WebRtcSpl_GetScalingSquare(int16_t* in_vector,
118                                    size_t in_vector_length,
119                                    size_t times);
120 
121 // Copy and set operations. Implementation in copy_set_operations.c.
122 // Descriptions at bottom of file.
123 void WebRtcSpl_MemSetW16(int16_t* vector,
124                          int16_t set_value,
125                          size_t vector_length);
126 void WebRtcSpl_MemSetW32(int32_t* vector,
127                          int32_t set_value,
128                          size_t vector_length);
129 void WebRtcSpl_MemCpyReversedOrder(int16_t* out_vector,
130                                    int16_t* in_vector,
131                                    size_t vector_length);
132 void WebRtcSpl_CopyFromEndW16(const int16_t* in_vector,
133                               size_t in_vector_length,
134                               size_t samples,
135                               int16_t* out_vector);
136 void WebRtcSpl_ZerosArrayW16(int16_t* vector,
137                              size_t vector_length);
138 void WebRtcSpl_ZerosArrayW32(int32_t* vector,
139                              size_t vector_length);
140 // End: Copy and set operations.
141 
142 
143 // Minimum and maximum operation functions and their pointers.
144 // Implementation in min_max_operations.c.
145 
146 // Returns the largest absolute value in a signed 16-bit vector.
147 //
148 // Input:
149 //      - vector : 16-bit input vector.
150 //      - length : Number of samples in vector.
151 //
152 // Return value  : Maximum absolute value in vector.
153 typedef int16_t (*MaxAbsValueW16)(const int16_t* vector, size_t length);
154 extern MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16;
155 int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t* vector, size_t length);
156 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
157 int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length);
158 #endif
159 #if defined(MIPS32_LE)
160 int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length);
161 #endif
162 
163 // Returns the largest absolute value in a signed 32-bit vector.
164 //
165 // Input:
166 //      - vector : 32-bit input vector.
167 //      - length : Number of samples in vector.
168 //
169 // Return value  : Maximum absolute value in vector.
170 typedef int32_t (*MaxAbsValueW32)(const int32_t* vector, size_t length);
171 extern MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32;
172 int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length);
173 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
174 int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length);
175 #endif
176 #if defined(MIPS_DSP_R1_LE)
177 int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length);
178 #endif
179 
180 // Returns the maximum value of a 16-bit vector.
181 //
182 // Input:
183 //      - vector : 16-bit input vector.
184 //      - length : Number of samples in vector.
185 //
186 // Return value  : Maximum sample value in |vector|.
187 typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length);
188 extern MaxValueW16 WebRtcSpl_MaxValueW16;
189 int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length);
190 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
191 int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length);
192 #endif
193 #if defined(MIPS32_LE)
194 int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length);
195 #endif
196 
197 // Returns the maximum value of a 32-bit vector.
198 //
199 // Input:
200 //      - vector : 32-bit input vector.
201 //      - length : Number of samples in vector.
202 //
203 // Return value  : Maximum sample value in |vector|.
204 typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length);
205 extern MaxValueW32 WebRtcSpl_MaxValueW32;
206 int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length);
207 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
208 int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length);
209 #endif
210 #if defined(MIPS32_LE)
211 int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length);
212 #endif
213 
214 // Returns the minimum value of a 16-bit vector.
215 //
216 // Input:
217 //      - vector : 16-bit input vector.
218 //      - length : Number of samples in vector.
219 //
220 // Return value  : Minimum sample value in |vector|.
221 typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length);
222 extern MinValueW16 WebRtcSpl_MinValueW16;
223 int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length);
224 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
225 int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length);
226 #endif
227 #if defined(MIPS32_LE)
228 int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length);
229 #endif
230 
231 // Returns the minimum value of a 32-bit vector.
232 //
233 // Input:
234 //      - vector : 32-bit input vector.
235 //      - length : Number of samples in vector.
236 //
237 // Return value  : Minimum sample value in |vector|.
238 typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length);
239 extern MinValueW32 WebRtcSpl_MinValueW32;
240 int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length);
241 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
242 int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length);
243 #endif
244 #if defined(MIPS32_LE)
245 int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length);
246 #endif
247 
248 // Returns the vector index to the largest absolute value of a 16-bit vector.
249 //
250 // Input:
251 //      - vector : 16-bit input vector.
252 //      - length : Number of samples in vector.
253 //
254 // Return value  : Index to the maximum absolute value in vector.
255 //                 If there are multiple equal maxima, return the index of the
256 //                 first. -32768 will always have precedence over 32767 (despite
257 //                 -32768 presenting an int16 absolute value of 32767).
258 size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length);
259 
260 // Returns the vector index to the maximum sample value of a 16-bit vector.
261 //
262 // Input:
263 //      - vector : 16-bit input vector.
264 //      - length : Number of samples in vector.
265 //
266 // Return value  : Index to the maximum value in vector (if multiple
267 //                 indexes have the maximum, return the first).
268 size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length);
269 
270 // Returns the vector index to the maximum sample value of a 32-bit vector.
271 //
272 // Input:
273 //      - vector : 32-bit input vector.
274 //      - length : Number of samples in vector.
275 //
276 // Return value  : Index to the maximum value in vector (if multiple
277 //                 indexes have the maximum, return the first).
278 size_t WebRtcSpl_MaxIndexW32(const int32_t* vector, size_t length);
279 
280 // Returns the vector index to the minimum sample value of a 16-bit vector.
281 //
282 // Input:
283 //      - vector : 16-bit input vector.
284 //      - length : Number of samples in vector.
285 //
286 // Return value  : Index to the mimimum value in vector  (if multiple
287 //                 indexes have the minimum, return the first).
288 size_t WebRtcSpl_MinIndexW16(const int16_t* vector, size_t length);
289 
290 // Returns the vector index to the minimum sample value of a 32-bit vector.
291 //
292 // Input:
293 //      - vector : 32-bit input vector.
294 //      - length : Number of samples in vector.
295 //
296 // Return value  : Index to the mimimum value in vector  (if multiple
297 //                 indexes have the minimum, return the first).
298 size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length);
299 
300 // End: Minimum and maximum operations.
301 
302 
303 // Vector scaling operations. Implementation in vector_scaling_operations.c.
304 // Description at bottom of file.
305 void WebRtcSpl_VectorBitShiftW16(int16_t* out_vector,
306                                  size_t vector_length,
307                                  const int16_t* in_vector,
308                                  int16_t right_shifts);
309 void WebRtcSpl_VectorBitShiftW32(int32_t* out_vector,
310                                  size_t vector_length,
311                                  const int32_t* in_vector,
312                                  int16_t right_shifts);
313 void WebRtcSpl_VectorBitShiftW32ToW16(int16_t* out_vector,
314                                       size_t vector_length,
315                                       const int32_t* in_vector,
316                                       int right_shifts);
317 void WebRtcSpl_ScaleVector(const int16_t* in_vector,
318                            int16_t* out_vector,
319                            int16_t gain,
320                            size_t vector_length,
321                            int16_t right_shifts);
322 void WebRtcSpl_ScaleVectorWithSat(const int16_t* in_vector,
323                                   int16_t* out_vector,
324                                   int16_t gain,
325                                   size_t vector_length,
326                                   int16_t right_shifts);
327 void WebRtcSpl_ScaleAndAddVectors(const int16_t* in_vector1,
328                                   int16_t gain1, int right_shifts1,
329                                   const int16_t* in_vector2,
330                                   int16_t gain2, int right_shifts2,
331                                   int16_t* out_vector,
332                                   size_t vector_length);
333 
334 // The functions (with related pointer) perform the vector operation:
335 //   out_vector[k] = ((scale1 * in_vector1[k]) + (scale2 * in_vector2[k])
336 //        + round_value) >> right_shifts,
337 //   where  round_value = (1 << right_shifts) >> 1.
338 //
339 // Input:
340 //      - in_vector1       : Input vector 1
341 //      - in_vector1_scale : Gain to be used for vector 1
342 //      - in_vector2       : Input vector 2
343 //      - in_vector2_scale : Gain to be used for vector 2
344 //      - right_shifts     : Number of right bit shifts to be applied
345 //      - length           : Number of elements in the input vectors
346 //
347 // Output:
348 //      - out_vector       : Output vector
349 // Return value            : 0 if OK, -1 if (in_vector1 == NULL
350 //                           || in_vector2 == NULL || out_vector == NULL
351 //                           || length <= 0 || right_shift < 0).
352 typedef int (*ScaleAndAddVectorsWithRound)(const int16_t* in_vector1,
353                                            int16_t in_vector1_scale,
354                                            const int16_t* in_vector2,
355                                            int16_t in_vector2_scale,
356                                            int right_shifts,
357                                            int16_t* out_vector,
358                                            size_t length);
359 extern ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
360 int WebRtcSpl_ScaleAndAddVectorsWithRoundC(const int16_t* in_vector1,
361                                            int16_t in_vector1_scale,
362                                            const int16_t* in_vector2,
363                                            int16_t in_vector2_scale,
364                                            int right_shifts,
365                                            int16_t* out_vector,
366                                            size_t length);
367 #if defined(MIPS_DSP_R1_LE)
368 int WebRtcSpl_ScaleAndAddVectorsWithRound_mips(const int16_t* in_vector1,
369                                                int16_t in_vector1_scale,
370                                                const int16_t* in_vector2,
371                                                int16_t in_vector2_scale,
372                                                int right_shifts,
373                                                int16_t* out_vector,
374                                                size_t length);
375 #endif
376 // End: Vector scaling operations.
377 
378 // iLBC specific functions. Implementations in ilbc_specific_functions.c.
379 // Description at bottom of file.
380 void WebRtcSpl_ReverseOrderMultArrayElements(int16_t* out_vector,
381                                              const int16_t* in_vector,
382                                              const int16_t* window,
383                                              size_t vector_length,
384                                              int16_t right_shifts);
385 void WebRtcSpl_ElementwiseVectorMult(int16_t* out_vector,
386                                      const int16_t* in_vector,
387                                      const int16_t* window,
388                                      size_t vector_length,
389                                      int16_t right_shifts);
390 void WebRtcSpl_AddVectorsAndShift(int16_t* out_vector,
391                                   const int16_t* in_vector1,
392                                   const int16_t* in_vector2,
393                                   size_t vector_length,
394                                   int16_t right_shifts);
395 void WebRtcSpl_AddAffineVectorToVector(int16_t* out_vector,
396                                        int16_t* in_vector,
397                                        int16_t gain,
398                                        int32_t add_constant,
399                                        int16_t right_shifts,
400                                        size_t vector_length);
401 void WebRtcSpl_AffineTransformVector(int16_t* out_vector,
402                                      int16_t* in_vector,
403                                      int16_t gain,
404                                      int32_t add_constant,
405                                      int16_t right_shifts,
406                                      size_t vector_length);
407 // End: iLBC specific functions.
408 
409 // Signal processing operations.
410 
411 // A 32-bit fix-point implementation of auto-correlation computation
412 //
413 // Input:
414 //      - in_vector        : Vector to calculate autocorrelation upon
415 //      - in_vector_length : Length (in samples) of |vector|
416 //      - order            : The order up to which the autocorrelation should be
417 //                           calculated
418 //
419 // Output:
420 //      - result           : auto-correlation values (values should be seen
421 //                           relative to each other since the absolute values
422 //                           might have been down shifted to avoid overflow)
423 //
424 //      - scale            : The number of left shifts required to obtain the
425 //                           auto-correlation in Q0
426 //
427 // Return value            : Number of samples in |result|, i.e. (order+1)
428 size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
429                                  size_t in_vector_length,
430                                  size_t order,
431                                  int32_t* result,
432                                  int* scale);
433 
434 // A 32-bit fix-point implementation of the Levinson-Durbin algorithm that
435 // does NOT use the 64 bit class
436 //
437 // Input:
438 //      - auto_corr : Vector with autocorrelation values of length >= |order|+1
439 //      - order     : The LPC filter order (support up to order 20)
440 //
441 // Output:
442 //      - lpc_coef  : lpc_coef[0..order] LPC coefficients in Q12
443 //      - refl_coef : refl_coef[0...order-1]| Reflection coefficients in Q15
444 //
445 // Return value     : 1 for stable 0 for unstable
446 int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr,
447                                  int16_t* lpc_coef,
448                                  int16_t* refl_coef,
449                                  size_t order);
450 
451 // Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
452 // This version is a 16 bit operation.
453 //
454 // NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
455 // "slightly unstable" filter (i.e., a pole just outside the unit circle) in
456 // "rare" cases even if the reflection coefficients are stable.
457 //
458 // Input:
459 //      - refl_coef : Reflection coefficients in Q15 that should be converted
460 //                    to LPC coefficients
461 //      - use_order : Number of coefficients in |refl_coef|
462 //
463 // Output:
464 //      - lpc_coef  : LPC coefficients in Q12
465 void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef,
466                              int use_order,
467                              int16_t* lpc_coef);
468 
469 // Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
470 // This version is a 16 bit operation.
471 // The conversion is implemented by the step-down algorithm.
472 //
473 // Input:
474 //      - lpc_coef  : LPC coefficients in Q12, that should be converted to
475 //                    reflection coefficients
476 //      - use_order : Number of coefficients in |lpc_coef|
477 //
478 // Output:
479 //      - refl_coef : Reflection coefficients in Q15.
480 void WebRtcSpl_LpcToReflCoef(int16_t* lpc_coef,
481                              int use_order,
482                              int16_t* refl_coef);
483 
484 // Calculates reflection coefficients (16 bit) from auto-correlation values
485 //
486 // Input:
487 //      - auto_corr : Auto-correlation values
488 //      - use_order : Number of coefficients wanted be calculated
489 //
490 // Output:
491 //      - refl_coef : Reflection coefficients in Q15.
492 void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr,
493                                   int use_order,
494                                   int16_t* refl_coef);
495 
496 // The functions (with related pointer) calculate the cross-correlation between
497 // two sequences |seq1| and |seq2|.
498 // |seq1| is fixed and |seq2| slides as the pointer is increased with the
499 // amount |step_seq2|. Note the arguments should obey the relationship:
500 // |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
501 //      buffer size of |seq2|
502 //
503 // Input:
504 //      - seq1           : First sequence (fixed throughout the correlation)
505 //      - seq2           : Second sequence (slides |step_vector2| for each
506 //                            new correlation)
507 //      - dim_seq        : Number of samples to use in the cross-correlation
508 //      - dim_cross_correlation : Number of cross-correlations to calculate (the
509 //                            start position for |vector2| is updated for each
510 //                            new one)
511 //      - right_shifts   : Number of right bit shifts to use. This will
512 //                            become the output Q-domain.
513 //      - step_seq2      : How many (positive or negative) steps the
514 //                            |vector2| pointer should be updated for each new
515 //                            cross-correlation value.
516 //
517 // Output:
518 //      - cross_correlation : The cross-correlation in Q(-right_shifts)
519 typedef void (*CrossCorrelation)(int32_t* cross_correlation,
520                                  const int16_t* seq1,
521                                  const int16_t* seq2,
522                                  size_t dim_seq,
523                                  size_t dim_cross_correlation,
524                                  int right_shifts,
525                                  int step_seq2);
526 extern CrossCorrelation WebRtcSpl_CrossCorrelation;
527 void WebRtcSpl_CrossCorrelationC(int32_t* cross_correlation,
528                                  const int16_t* seq1,
529                                  const int16_t* seq2,
530                                  size_t dim_seq,
531                                  size_t dim_cross_correlation,
532                                  int right_shifts,
533                                  int step_seq2);
534 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
535 void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
536                                     const int16_t* seq1,
537                                     const int16_t* seq2,
538                                     size_t dim_seq,
539                                     size_t dim_cross_correlation,
540                                     int right_shifts,
541                                     int step_seq2);
542 #endif
543 #if defined(MIPS32_LE)
544 void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
545                                      const int16_t* seq1,
546                                      const int16_t* seq2,
547                                      size_t dim_seq,
548                                      size_t dim_cross_correlation,
549                                      int right_shifts,
550                                      int step_seq2);
551 #endif
552 
553 // Creates (the first half of) a Hanning window. Size must be at least 1 and
554 // at most 512.
555 //
556 // Input:
557 //      - size      : Length of the requested Hanning window (1 to 512)
558 //
559 // Output:
560 //      - window    : Hanning vector in Q14.
561 void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size);
562 
563 // Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
564 // |in_vector|. Input and output values are in Q15.
565 //
566 // Inputs:
567 //      - in_vector     : Values to calculate sqrt(1 - x^2) of
568 //      - vector_length : Length of vector |in_vector|
569 //
570 // Output:
571 //      - out_vector    : Output values in Q15
572 void WebRtcSpl_SqrtOfOneMinusXSquared(int16_t* in_vector,
573                                       size_t vector_length,
574                                       int16_t* out_vector);
575 // End: Signal processing operations.
576 
577 // Randomization functions. Implementations collected in
578 // randomization_functions.c and descriptions at bottom of this file.
579 int16_t WebRtcSpl_RandU(uint32_t* seed);
580 int16_t WebRtcSpl_RandN(uint32_t* seed);
581 int16_t WebRtcSpl_RandUArray(int16_t* vector,
582                              int16_t vector_length,
583                              uint32_t* seed);
584 // End: Randomization functions.
585 
586 // Math functions
587 int32_t WebRtcSpl_Sqrt(int32_t value);
588 int32_t WebRtcSpl_SqrtFloor(int32_t value);
589 
590 // Divisions. Implementations collected in division_operations.c and
591 // descriptions at bottom of this file.
592 uint32_t WebRtcSpl_DivU32U16(uint32_t num, uint16_t den);
593 int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den);
594 int16_t WebRtcSpl_DivW32W16ResW16(int32_t num, int16_t den);
595 int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den);
596 int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low);
597 // End: Divisions.
598 
599 int32_t WebRtcSpl_Energy(int16_t* vector,
600                          size_t vector_length,
601                          int* scale_factor);
602 
603 // Calculates the dot product between two (int16_t) vectors.
604 //
605 // Input:
606 //      - vector1       : Vector 1
607 //      - vector2       : Vector 2
608 //      - vector_length : Number of samples used in the dot product
609 //      - scaling       : The number of right bit shifts to apply on each term
610 //                        during calculation to avoid overflow, i.e., the
611 //                        output will be in Q(-|scaling|)
612 //
613 // Return value         : The dot product in Q(-scaling)
614 int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,
615                                       const int16_t* vector2,
616                                       size_t length,
617                                       int scaling);
618 
619 // Filter operations.
620 size_t WebRtcSpl_FilterAR(const int16_t* ar_coef,
621                           size_t ar_coef_length,
622                           const int16_t* in_vector,
623                           size_t in_vector_length,
624                           int16_t* filter_state,
625                           size_t filter_state_length,
626                           int16_t* filter_state_low,
627                           size_t filter_state_low_length,
628                           int16_t* out_vector,
629                           int16_t* out_vector_low,
630                           size_t out_vector_low_length);
631 
632 // WebRtcSpl_FilterMAFastQ12(...)
633 //
634 // Performs a MA filtering on a vector in Q12
635 //
636 // Input:
637 //      - in_vector         : Input samples (state in positions
638 //                            in_vector[-order] .. in_vector[-1])
639 //      - ma_coef           : Filter coefficients (in Q12)
640 //      - ma_coef_length    : Number of B coefficients (order+1)
641 //      - vector_length     : Number of samples to be filtered
642 //
643 // Output:
644 //      - out_vector        : Filtered samples
645 //
646 void WebRtcSpl_FilterMAFastQ12(const int16_t* in_vector,
647                                int16_t* out_vector,
648                                const int16_t* ma_coef,
649                                size_t ma_coef_length,
650                                size_t vector_length);
651 
652 // Performs a AR filtering on a vector in Q12
653 // Input:
654 //      - data_in            : Input samples
655 //      - data_out           : State information in positions
656 //                               data_out[-order] .. data_out[-1]
657 //      - coefficients       : Filter coefficients (in Q12)
658 //      - coefficients_length: Number of coefficients (order+1)
659 //      - data_length        : Number of samples to be filtered
660 // Output:
661 //      - data_out           : Filtered samples
662 void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
663                                int16_t* data_out,
664                                const int16_t* __restrict coefficients,
665                                size_t coefficients_length,
666                                size_t data_length);
667 
668 // The functions (with related pointer) perform a MA down sampling filter
669 // on a vector.
670 // Input:
671 //      - data_in            : Input samples (state in positions
672 //                               data_in[-order] .. data_in[-1])
673 //      - data_in_length     : Number of samples in |data_in| to be filtered.
674 //                               This must be at least
675 //                               |delay| + |factor|*(|out_vector_length|-1) + 1)
676 //      - data_out_length    : Number of down sampled samples desired
677 //      - coefficients       : Filter coefficients (in Q12)
678 //      - coefficients_length: Number of coefficients (order+1)
679 //      - factor             : Decimation factor
680 //      - delay              : Delay of filter (compensated for in out_vector)
681 // Output:
682 //      - data_out           : Filtered samples
683 // Return value              : 0 if OK, -1 if |in_vector| is too short
684 typedef int (*DownsampleFast)(const int16_t* data_in,
685                               size_t data_in_length,
686                               int16_t* data_out,
687                               size_t data_out_length,
688                               const int16_t* __restrict coefficients,
689                               size_t coefficients_length,
690                               int factor,
691                               size_t delay);
692 extern DownsampleFast WebRtcSpl_DownsampleFast;
693 int WebRtcSpl_DownsampleFastC(const int16_t* data_in,
694                               size_t data_in_length,
695                               int16_t* data_out,
696                               size_t data_out_length,
697                               const int16_t* __restrict coefficients,
698                               size_t coefficients_length,
699                               int factor,
700                               size_t delay);
701 #if (defined WEBRTC_DETECT_NEON) || (defined WEBRTC_HAS_NEON)
702 int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
703                                  size_t data_in_length,
704                                  int16_t* data_out,
705                                  size_t data_out_length,
706                                  const int16_t* __restrict coefficients,
707                                  size_t coefficients_length,
708                                  int factor,
709                                  size_t delay);
710 #endif
711 #if defined(MIPS32_LE)
712 int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
713                                   size_t data_in_length,
714                                   int16_t* data_out,
715                                   size_t data_out_length,
716                                   const int16_t* __restrict coefficients,
717                                   size_t coefficients_length,
718                                   int factor,
719                                   size_t delay);
720 #endif
721 
722 // End: Filter operations.
723 
724 // FFT operations
725 
726 int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode);
727 int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode);
728 
729 // Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit
730 // values, and swap elements whose indexes are bit-reverses of each other.
731 //
732 // Input:
733 //      - complex_data  : Complex data buffer containing 2^|stages| real
734 //                        elements interleaved with 2^|stages| imaginary
735 //                        elements: [Re Im Re Im Re Im....]
736 //      - stages        : Number of FFT stages. Must be at least 3 and at most
737 //                        10, since the table WebRtcSpl_kSinTable1024[] is 1024
738 //                        elements long.
739 //
740 // Output:
741 //      - complex_data  : The complex data buffer.
742 
743 void WebRtcSpl_ComplexBitReverse(int16_t* __restrict complex_data, int stages);
744 
745 // End: FFT operations
746 
747 /************************************************************
748  *
749  * RESAMPLING FUNCTIONS AND THEIR STRUCTS ARE DEFINED BELOW
750  *
751  ************************************************************/
752 
753 /*******************************************************************
754  * resample.c
755  *
756  * Includes the following resampling combinations
757  * 22 kHz -> 16 kHz
758  * 16 kHz -> 22 kHz
759  * 22 kHz ->  8 kHz
760  *  8 kHz -> 22 kHz
761  *
762  ******************************************************************/
763 
764 // state structure for 22 -> 16 resampler
765 typedef struct {
766   int32_t S_22_44[8];
767   int32_t S_44_32[8];
768   int32_t S_32_16[8];
769 } WebRtcSpl_State22khzTo16khz;
770 
771 void WebRtcSpl_Resample22khzTo16khz(const int16_t* in,
772                                     int16_t* out,
773                                     WebRtcSpl_State22khzTo16khz* state,
774                                     int32_t* tmpmem);
775 
776 void WebRtcSpl_ResetResample22khzTo16khz(WebRtcSpl_State22khzTo16khz* state);
777 
778 // state structure for 16 -> 22 resampler
779 typedef struct {
780   int32_t S_16_32[8];
781   int32_t S_32_22[8];
782 } WebRtcSpl_State16khzTo22khz;
783 
784 void WebRtcSpl_Resample16khzTo22khz(const int16_t* in,
785                                     int16_t* out,
786                                     WebRtcSpl_State16khzTo22khz* state,
787                                     int32_t* tmpmem);
788 
789 void WebRtcSpl_ResetResample16khzTo22khz(WebRtcSpl_State16khzTo22khz* state);
790 
791 // state structure for 22 -> 8 resampler
792 typedef struct {
793   int32_t S_22_22[16];
794   int32_t S_22_16[8];
795   int32_t S_16_8[8];
796 } WebRtcSpl_State22khzTo8khz;
797 
798 void WebRtcSpl_Resample22khzTo8khz(const int16_t* in, int16_t* out,
799                                    WebRtcSpl_State22khzTo8khz* state,
800                                    int32_t* tmpmem);
801 
802 void WebRtcSpl_ResetResample22khzTo8khz(WebRtcSpl_State22khzTo8khz* state);
803 
804 // state structure for 8 -> 22 resampler
805 typedef struct {
806   int32_t S_8_16[8];
807   int32_t S_16_11[8];
808   int32_t S_11_22[8];
809 } WebRtcSpl_State8khzTo22khz;
810 
811 void WebRtcSpl_Resample8khzTo22khz(const int16_t* in, int16_t* out,
812                                    WebRtcSpl_State8khzTo22khz* state,
813                                    int32_t* tmpmem);
814 
815 void WebRtcSpl_ResetResample8khzTo22khz(WebRtcSpl_State8khzTo22khz* state);
816 
817 /*******************************************************************
818  * resample_fractional.c
819  * Functions for internal use in the other resample functions
820  *
821  * Includes the following resampling combinations
822  * 48 kHz -> 32 kHz
823  * 32 kHz -> 24 kHz
824  * 44 kHz -> 32 kHz
825  *
826  ******************************************************************/
827 
828 void WebRtcSpl_Resample48khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
829 
830 void WebRtcSpl_Resample32khzTo24khz(const int32_t* In, int32_t* Out, size_t K);
831 
832 void WebRtcSpl_Resample44khzTo32khz(const int32_t* In, int32_t* Out, size_t K);
833 
834 /*******************************************************************
835  * resample_48khz.c
836  *
837  * Includes the following resampling combinations
838  * 48 kHz -> 16 kHz
839  * 16 kHz -> 48 kHz
840  * 48 kHz ->  8 kHz
841  *  8 kHz -> 48 kHz
842  *
843  ******************************************************************/
844 
845 typedef struct {
846   int32_t S_48_48[16];
847   int32_t S_48_32[8];
848   int32_t S_32_16[8];
849 } WebRtcSpl_State48khzTo16khz;
850 
851 void WebRtcSpl_Resample48khzTo16khz(const int16_t* in, int16_t* out,
852                                     WebRtcSpl_State48khzTo16khz* state,
853                                     int32_t* tmpmem);
854 
855 void WebRtcSpl_ResetResample48khzTo16khz(WebRtcSpl_State48khzTo16khz* state);
856 
857 typedef struct {
858   int32_t S_16_32[8];
859   int32_t S_32_24[8];
860   int32_t S_24_48[8];
861 } WebRtcSpl_State16khzTo48khz;
862 
863 void WebRtcSpl_Resample16khzTo48khz(const int16_t* in, int16_t* out,
864                                     WebRtcSpl_State16khzTo48khz* state,
865                                     int32_t* tmpmem);
866 
867 void WebRtcSpl_ResetResample16khzTo48khz(WebRtcSpl_State16khzTo48khz* state);
868 
869 typedef struct {
870   int32_t S_48_24[8];
871   int32_t S_24_24[16];
872   int32_t S_24_16[8];
873   int32_t S_16_8[8];
874 } WebRtcSpl_State48khzTo8khz;
875 
876 void WebRtcSpl_Resample48khzTo8khz(const int16_t* in, int16_t* out,
877                                    WebRtcSpl_State48khzTo8khz* state,
878                                    int32_t* tmpmem);
879 
880 void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz* state);
881 
882 typedef struct {
883   int32_t S_8_16[8];
884   int32_t S_16_12[8];
885   int32_t S_12_24[8];
886   int32_t S_24_48[8];
887 } WebRtcSpl_State8khzTo48khz;
888 
889 void WebRtcSpl_Resample8khzTo48khz(const int16_t* in, int16_t* out,
890                                    WebRtcSpl_State8khzTo48khz* state,
891                                    int32_t* tmpmem);
892 
893 void WebRtcSpl_ResetResample8khzTo48khz(WebRtcSpl_State8khzTo48khz* state);
894 
895 /*******************************************************************
896  * resample_by_2.c
897  *
898  * Includes down and up sampling by a factor of two.
899  *
900  ******************************************************************/
901 
902 void WebRtcSpl_DownsampleBy2(const int16_t* in, size_t len,
903                              int16_t* out, int32_t* filtState);
904 
905 void WebRtcSpl_UpsampleBy2(const int16_t* in, size_t len,
906                            int16_t* out, int32_t* filtState);
907 
908 /************************************************************
909  * END OF RESAMPLING FUNCTIONS
910  ************************************************************/
911 void WebRtcSpl_AnalysisQMF(const int16_t* in_data,
912                            size_t in_data_length,
913                            int16_t* low_band,
914                            int16_t* high_band,
915                            int32_t* filter_state1,
916                            int32_t* filter_state2);
917 void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
918                             const int16_t* high_band,
919                             size_t band_length,
920                             int16_t* out_data,
921                             int32_t* filter_state1,
922                             int32_t* filter_state2);
923 
924 #ifdef __cplusplus
925 }
926 #endif  // __cplusplus
927 #endif  // WEBRTC_SPL_SIGNAL_PROCESSING_LIBRARY_H_
928 
929 //
930 // WebRtcSpl_AddSatW16(...)
931 // WebRtcSpl_AddSatW32(...)
932 //
933 // Returns the result of a saturated 16-bit, respectively 32-bit, addition of
934 // the numbers specified by the |var1| and |var2| parameters.
935 //
936 // Input:
937 //      - var1      : Input variable 1
938 //      - var2      : Input variable 2
939 //
940 // Return value     : Added and saturated value
941 //
942 
943 //
944 // WebRtcSpl_SubSatW16(...)
945 // WebRtcSpl_SubSatW32(...)
946 //
947 // Returns the result of a saturated 16-bit, respectively 32-bit, subtraction
948 // of the numbers specified by the |var1| and |var2| parameters.
949 //
950 // Input:
951 //      - var1      : Input variable 1
952 //      - var2      : Input variable 2
953 //
954 // Returned value   : Subtracted and saturated value
955 //
956 
957 //
958 // WebRtcSpl_GetSizeInBits(...)
959 //
960 // Returns the # of bits that are needed at the most to represent the number
961 // specified by the |value| parameter.
962 //
963 // Input:
964 //      - value     : Input value
965 //
966 // Return value     : Number of bits needed to represent |value|
967 //
968 
969 //
970 // WebRtcSpl_NormW32(...)
971 //
972 // Norm returns the # of left shifts required to 32-bit normalize the 32-bit
973 // signed number specified by the |value| parameter.
974 //
975 // Input:
976 //      - value     : Input value
977 //
978 // Return value     : Number of bit shifts needed to 32-bit normalize |value|
979 //
980 
981 //
982 // WebRtcSpl_NormW16(...)
983 //
984 // Norm returns the # of left shifts required to 16-bit normalize the 16-bit
985 // signed number specified by the |value| parameter.
986 //
987 // Input:
988 //      - value     : Input value
989 //
990 // Return value     : Number of bit shifts needed to 32-bit normalize |value|
991 //
992 
993 //
994 // WebRtcSpl_NormU32(...)
995 //
996 // Norm returns the # of left shifts required to 32-bit normalize the unsigned
997 // 32-bit number specified by the |value| parameter.
998 //
999 // Input:
1000 //      - value     : Input value
1001 //
1002 // Return value     : Number of bit shifts needed to 32-bit normalize |value|
1003 //
1004 
1005 //
1006 // WebRtcSpl_GetScalingSquare(...)
1007 //
1008 // Returns the # of bits required to scale the samples specified in the
1009 // |in_vector| parameter so that, if the squares of the samples are added the
1010 // # of times specified by the |times| parameter, the 32-bit addition will not
1011 // overflow (result in int32_t).
1012 //
1013 // Input:
1014 //      - in_vector         : Input vector to check scaling on
1015 //      - in_vector_length  : Samples in |in_vector|
1016 //      - times             : Number of additions to be performed
1017 //
1018 // Return value             : Number of right bit shifts needed to avoid
1019 //                            overflow in the addition calculation
1020 //
1021 
1022 //
1023 // WebRtcSpl_MemSetW16(...)
1024 //
1025 // Sets all the values in the int16_t vector |vector| of length
1026 // |vector_length| to the specified value |set_value|
1027 //
1028 // Input:
1029 //      - vector        : Pointer to the int16_t vector
1030 //      - set_value     : Value specified
1031 //      - vector_length : Length of vector
1032 //
1033 
1034 //
1035 // WebRtcSpl_MemSetW32(...)
1036 //
1037 // Sets all the values in the int32_t vector |vector| of length
1038 // |vector_length| to the specified value |set_value|
1039 //
1040 // Input:
1041 //      - vector        : Pointer to the int16_t vector
1042 //      - set_value     : Value specified
1043 //      - vector_length : Length of vector
1044 //
1045 
1046 //
1047 // WebRtcSpl_MemCpyReversedOrder(...)
1048 //
1049 // Copies all the values from the source int16_t vector |in_vector| to a
1050 // destination int16_t vector |out_vector|. It is done in reversed order,
1051 // meaning that the first sample of |in_vector| is copied to the last sample of
1052 // the |out_vector|. The procedure continues until the last sample of
1053 // |in_vector| has been copied to the first sample of |out_vector|. This
1054 // creates a reversed vector. Used in e.g. prediction in iLBC.
1055 //
1056 // Input:
1057 //      - in_vector     : Pointer to the first sample in a int16_t vector
1058 //                        of length |length|
1059 //      - vector_length : Number of elements to copy
1060 //
1061 // Output:
1062 //      - out_vector    : Pointer to the last sample in a int16_t vector
1063 //                        of length |length|
1064 //
1065 
1066 //
1067 // WebRtcSpl_CopyFromEndW16(...)
1068 //
1069 // Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|)
1070 // to the vector |out_vector|.
1071 //
1072 // Input:
1073 //      - in_vector         : Input vector
1074 //      - in_vector_length  : Number of samples in |in_vector|
1075 //      - samples           : Number of samples to extract (from right side)
1076 //                            from |in_vector|
1077 //
1078 // Output:
1079 //      - out_vector        : Vector with the requested samples
1080 //
1081 
1082 //
1083 // WebRtcSpl_ZerosArrayW16(...)
1084 // WebRtcSpl_ZerosArrayW32(...)
1085 //
1086 // Inserts the value "zero" in all positions of a w16 and a w32 vector
1087 // respectively.
1088 //
1089 // Input:
1090 //      - vector_length : Number of samples in vector
1091 //
1092 // Output:
1093 //      - vector        : Vector containing all zeros
1094 //
1095 
1096 //
1097 // WebRtcSpl_VectorBitShiftW16(...)
1098 // WebRtcSpl_VectorBitShiftW32(...)
1099 //
1100 // Bit shifts all the values in a vector up or downwards. Different calls for
1101 // int16_t and int32_t vectors respectively.
1102 //
1103 // Input:
1104 //      - vector_length : Length of vector
1105 //      - in_vector     : Pointer to the vector that should be bit shifted
1106 //      - right_shifts  : Number of right bit shifts (negative value gives left
1107 //                        shifts)
1108 //
1109 // Output:
1110 //      - out_vector    : Pointer to the result vector (can be the same as
1111 //                        |in_vector|)
1112 //
1113 
1114 //
1115 // WebRtcSpl_VectorBitShiftW32ToW16(...)
1116 //
1117 // Bit shifts all the values in a int32_t vector up or downwards and
1118 // stores the result as an int16_t vector. The function will saturate the
1119 // signal if needed, before storing in the output vector.
1120 //
1121 // Input:
1122 //      - vector_length : Length of vector
1123 //      - in_vector     : Pointer to the vector that should be bit shifted
1124 //      - right_shifts  : Number of right bit shifts (negative value gives left
1125 //                        shifts)
1126 //
1127 // Output:
1128 //      - out_vector    : Pointer to the result vector (can be the same as
1129 //                        |in_vector|)
1130 //
1131 
1132 //
1133 // WebRtcSpl_ScaleVector(...)
1134 //
1135 // Performs the vector operation:
1136 //  out_vector[k] = (gain*in_vector[k])>>right_shifts
1137 //
1138 // Input:
1139 //      - in_vector     : Input vector
1140 //      - gain          : Scaling gain
1141 //      - vector_length : Elements in the |in_vector|
1142 //      - right_shifts  : Number of right bit shifts applied
1143 //
1144 // Output:
1145 //      - out_vector    : Output vector (can be the same as |in_vector|)
1146 //
1147 
1148 //
1149 // WebRtcSpl_ScaleVectorWithSat(...)
1150 //
1151 // Performs the vector operation:
1152 //  out_vector[k] = SATURATE( (gain*in_vector[k])>>right_shifts )
1153 //
1154 // Input:
1155 //      - in_vector     : Input vector
1156 //      - gain          : Scaling gain
1157 //      - vector_length : Elements in the |in_vector|
1158 //      - right_shifts  : Number of right bit shifts applied
1159 //
1160 // Output:
1161 //      - out_vector    : Output vector (can be the same as |in_vector|)
1162 //
1163 
1164 //
1165 // WebRtcSpl_ScaleAndAddVectors(...)
1166 //
1167 // Performs the vector operation:
1168 //  out_vector[k] = (gain1*in_vector1[k])>>right_shifts1
1169 //                  + (gain2*in_vector2[k])>>right_shifts2
1170 //
1171 // Input:
1172 //      - in_vector1    : Input vector 1
1173 //      - gain1         : Gain to be used for vector 1
1174 //      - right_shifts1 : Right bit shift to be used for vector 1
1175 //      - in_vector2    : Input vector 2
1176 //      - gain2         : Gain to be used for vector 2
1177 //      - right_shifts2 : Right bit shift to be used for vector 2
1178 //      - vector_length : Elements in the input vectors
1179 //
1180 // Output:
1181 //      - out_vector    : Output vector
1182 //
1183 
1184 //
1185 // WebRtcSpl_ReverseOrderMultArrayElements(...)
1186 //
1187 // Performs the vector operation:
1188 //  out_vector[n] = (in_vector[n]*window[-n])>>right_shifts
1189 //
1190 // Input:
1191 //      - in_vector     : Input vector
1192 //      - window        : Window vector (should be reversed). The pointer
1193 //                        should be set to the last value in the vector
1194 //      - right_shifts  : Number of right bit shift to be applied after the
1195 //                        multiplication
1196 //      - vector_length : Number of elements in |in_vector|
1197 //
1198 // Output:
1199 //      - out_vector    : Output vector (can be same as |in_vector|)
1200 //
1201 
1202 //
1203 // WebRtcSpl_ElementwiseVectorMult(...)
1204 //
1205 // Performs the vector operation:
1206 //  out_vector[n] = (in_vector[n]*window[n])>>right_shifts
1207 //
1208 // Input:
1209 //      - in_vector     : Input vector
1210 //      - window        : Window vector.
1211 //      - right_shifts  : Number of right bit shift to be applied after the
1212 //                        multiplication
1213 //      - vector_length : Number of elements in |in_vector|
1214 //
1215 // Output:
1216 //      - out_vector    : Output vector (can be same as |in_vector|)
1217 //
1218 
1219 //
1220 // WebRtcSpl_AddVectorsAndShift(...)
1221 //
1222 // Performs the vector operation:
1223 //  out_vector[k] = (in_vector1[k] + in_vector2[k])>>right_shifts
1224 //
1225 // Input:
1226 //      - in_vector1    : Input vector 1
1227 //      - in_vector2    : Input vector 2
1228 //      - right_shifts  : Number of right bit shift to be applied after the
1229 //                        multiplication
1230 //      - vector_length : Number of elements in |in_vector1| and |in_vector2|
1231 //
1232 // Output:
1233 //      - out_vector    : Output vector (can be same as |in_vector1|)
1234 //
1235 
1236 //
1237 // WebRtcSpl_AddAffineVectorToVector(...)
1238 //
1239 // Adds an affine transformed vector to another vector |out_vector|, i.e,
1240 // performs
1241 //  out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts
1242 //
1243 // Input:
1244 //      - in_vector     : Input vector
1245 //      - gain          : Gain value, used to multiply the in vector with
1246 //      - add_constant  : Constant value to add (usually 1<<(right_shifts-1),
1247 //                        but others can be used as well
1248 //      - right_shifts  : Number of right bit shifts (0-16)
1249 //      - vector_length : Number of samples in |in_vector| and |out_vector|
1250 //
1251 // Output:
1252 //      - out_vector    : Vector with the output
1253 //
1254 
1255 //
1256 // WebRtcSpl_AffineTransformVector(...)
1257 //
1258 // Affine transforms a vector, i.e, performs
1259 //  out_vector[k] = (in_vector[k]*gain+add_constant)>>right_shifts
1260 //
1261 // Input:
1262 //      - in_vector     : Input vector
1263 //      - gain          : Gain value, used to multiply the in vector with
1264 //      - add_constant  : Constant value to add (usually 1<<(right_shifts-1),
1265 //                        but others can be used as well
1266 //      - right_shifts  : Number of right bit shifts (0-16)
1267 //      - vector_length : Number of samples in |in_vector| and |out_vector|
1268 //
1269 // Output:
1270 //      - out_vector    : Vector with the output
1271 //
1272 
1273 //
1274 // WebRtcSpl_IncreaseSeed(...)
1275 //
1276 // Increases the seed (and returns the new value)
1277 //
1278 // Input:
1279 //      - seed      : Seed for random calculation
1280 //
1281 // Output:
1282 //      - seed      : Updated seed value
1283 //
1284 // Return value     : The new seed value
1285 //
1286 
1287 //
1288 // WebRtcSpl_RandU(...)
1289 //
1290 // Produces a uniformly distributed value in the int16_t range
1291 //
1292 // Input:
1293 //      - seed      : Seed for random calculation
1294 //
1295 // Output:
1296 //      - seed      : Updated seed value
1297 //
1298 // Return value     : Uniformly distributed value in the range
1299 //                    [Word16_MIN...Word16_MAX]
1300 //
1301 
1302 //
1303 // WebRtcSpl_RandN(...)
1304 //
1305 // Produces a normal distributed value in the int16_t range
1306 //
1307 // Input:
1308 //      - seed      : Seed for random calculation
1309 //
1310 // Output:
1311 //      - seed      : Updated seed value
1312 //
1313 // Return value     : N(0,1) value in the Q13 domain
1314 //
1315 
1316 //
1317 // WebRtcSpl_RandUArray(...)
1318 //
1319 // Produces a uniformly distributed vector with elements in the int16_t
1320 // range
1321 //
1322 // Input:
1323 //      - vector_length : Samples wanted in the vector
1324 //      - seed          : Seed for random calculation
1325 //
1326 // Output:
1327 //      - vector        : Vector with the uniform values
1328 //      - seed          : Updated seed value
1329 //
1330 // Return value         : Number of samples in vector, i.e., |vector_length|
1331 //
1332 
1333 //
1334 // WebRtcSpl_Sqrt(...)
1335 //
1336 // Returns the square root of the input value |value|. The precision of this
1337 // function is integer precision, i.e., sqrt(8) gives 2 as answer.
1338 // If |value| is a negative number then 0 is returned.
1339 //
1340 // Algorithm:
1341 //
1342 // A sixth order Taylor Series expansion is used here to compute the square
1343 // root of a number y^0.5 = (1+x)^0.5
1344 // where
1345 // x = y-1
1346 //   = 1+(x/2)-0.5*((x/2)^2+0.5*((x/2)^3-0.625*((x/2)^4+0.875*((x/2)^5)
1347 // 0.5 <= x < 1
1348 //
1349 // Input:
1350 //      - value     : Value to calculate sqrt of
1351 //
1352 // Return value     : Result of the sqrt calculation
1353 //
1354 
1355 //
1356 // WebRtcSpl_SqrtFloor(...)
1357 //
1358 // Returns the square root of the input value |value|. The precision of this
1359 // function is rounding down integer precision, i.e., sqrt(8) gives 2 as answer.
1360 // If |value| is a negative number then 0 is returned.
1361 //
1362 // Algorithm:
1363 //
1364 // An iterative 4 cylce/bit routine
1365 //
1366 // Input:
1367 //      - value     : Value to calculate sqrt of
1368 //
1369 // Return value     : Result of the sqrt calculation
1370 //
1371 
1372 //
1373 // WebRtcSpl_DivU32U16(...)
1374 //
1375 // Divides a uint32_t |num| by a uint16_t |den|.
1376 //
1377 // If |den|==0, (uint32_t)0xFFFFFFFF is returned.
1378 //
1379 // Input:
1380 //      - num       : Numerator
1381 //      - den       : Denominator
1382 //
1383 // Return value     : Result of the division (as a uint32_t), i.e., the
1384 //                    integer part of num/den.
1385 //
1386 
1387 //
1388 // WebRtcSpl_DivW32W16(...)
1389 //
1390 // Divides a int32_t |num| by a int16_t |den|.
1391 //
1392 // If |den|==0, (int32_t)0x7FFFFFFF is returned.
1393 //
1394 // Input:
1395 //      - num       : Numerator
1396 //      - den       : Denominator
1397 //
1398 // Return value     : Result of the division (as a int32_t), i.e., the
1399 //                    integer part of num/den.
1400 //
1401 
1402 //
1403 // WebRtcSpl_DivW32W16ResW16(...)
1404 //
1405 // Divides a int32_t |num| by a int16_t |den|, assuming that the
1406 // result is less than 32768, otherwise an unpredictable result will occur.
1407 //
1408 // If |den|==0, (int16_t)0x7FFF is returned.
1409 //
1410 // Input:
1411 //      - num       : Numerator
1412 //      - den       : Denominator
1413 //
1414 // Return value     : Result of the division (as a int16_t), i.e., the
1415 //                    integer part of num/den.
1416 //
1417 
1418 //
1419 // WebRtcSpl_DivResultInQ31(...)
1420 //
1421 // Divides a int32_t |num| by a int16_t |den|, assuming that the
1422 // absolute value of the denominator is larger than the numerator, otherwise
1423 // an unpredictable result will occur.
1424 //
1425 // Input:
1426 //      - num       : Numerator
1427 //      - den       : Denominator
1428 //
1429 // Return value     : Result of the division in Q31.
1430 //
1431 
1432 //
1433 // WebRtcSpl_DivW32HiLow(...)
1434 //
1435 // Divides a int32_t |num| by a denominator in hi, low format. The
1436 // absolute value of the denominator has to be larger (or equal to) the
1437 // numerator.
1438 //
1439 // Input:
1440 //      - num       : Numerator
1441 //      - den_hi    : High part of denominator
1442 //      - den_low   : Low part of denominator
1443 //
1444 // Return value     : Divided value in Q31
1445 //
1446 
1447 //
1448 // WebRtcSpl_Energy(...)
1449 //
1450 // Calculates the energy of a vector
1451 //
1452 // Input:
1453 //      - vector        : Vector which the energy should be calculated on
1454 //      - vector_length : Number of samples in vector
1455 //
1456 // Output:
1457 //      - scale_factor  : Number of left bit shifts needed to get the physical
1458 //                        energy value, i.e, to get the Q0 value
1459 //
1460 // Return value         : Energy value in Q(-|scale_factor|)
1461 //
1462 
1463 //
1464 // WebRtcSpl_FilterAR(...)
1465 //
1466 // Performs a 32-bit AR filtering on a vector in Q12
1467 //
1468 // Input:
1469 //  - ar_coef                   : AR-coefficient vector (values in Q12),
1470 //                                ar_coef[0] must be 4096.
1471 //  - ar_coef_length            : Number of coefficients in |ar_coef|.
1472 //  - in_vector                 : Vector to be filtered.
1473 //  - in_vector_length          : Number of samples in |in_vector|.
1474 //  - filter_state              : Current state (higher part) of the filter.
1475 //  - filter_state_length       : Length (in samples) of |filter_state|.
1476 //  - filter_state_low          : Current state (lower part) of the filter.
1477 //  - filter_state_low_length   : Length (in samples) of |filter_state_low|.
1478 //  - out_vector_low_length     : Maximum length (in samples) of
1479 //                                |out_vector_low|.
1480 //
1481 // Output:
1482 //  - filter_state              : Updated state (upper part) vector.
1483 //  - filter_state_low          : Updated state (lower part) vector.
1484 //  - out_vector                : Vector containing the upper part of the
1485 //                                filtered values.
1486 //  - out_vector_low            : Vector containing the lower part of the
1487 //                                filtered values.
1488 //
1489 // Return value                 : Number of samples in the |out_vector|.
1490 //
1491 
1492 //
1493 // WebRtcSpl_ComplexIFFT(...)
1494 //
1495 // Complex Inverse FFT
1496 //
1497 // Computes an inverse complex 2^|stages|-point FFT on the input vector, which
1498 // is in bit-reversed order. The original content of the vector is destroyed in
1499 // the process, since the input is overwritten by the output, normal-ordered,
1500 // FFT vector. With X as the input complex vector, y as the output complex
1501 // vector and with M = 2^|stages|, the following is computed:
1502 //
1503 //        M-1
1504 // y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
1505 //        i=0
1506 //
1507 // The implementations are optimized for speed, not for code size. It uses the
1508 // decimation-in-time algorithm with radix-2 butterfly technique.
1509 //
1510 // Input:
1511 //      - vector    : In pointer to complex vector containing 2^|stages|
1512 //                    real elements interleaved with 2^|stages| imaginary
1513 //                    elements.
1514 //                    [ReImReImReIm....]
1515 //                    The elements are in Q(-scale) domain, see more on Return
1516 //                    Value below.
1517 //
1518 //      - stages    : Number of FFT stages. Must be at least 3 and at most 10,
1519 //                    since the table WebRtcSpl_kSinTable1024[] is 1024
1520 //                    elements long.
1521 //
1522 //      - mode      : This parameter gives the user to choose how the FFT
1523 //                    should work.
1524 //                    mode==0: Low-complexity and Low-accuracy mode
1525 //                    mode==1: High-complexity and High-accuracy mode
1526 //
1527 // Output:
1528 //      - vector    : Out pointer to the FFT vector (the same as input).
1529 //
1530 // Return Value     : The scale value that tells the number of left bit shifts
1531 //                    that the elements in the |vector| should be shifted with
1532 //                    in order to get Q0 values, i.e. the physically correct
1533 //                    values. The scale parameter is always 0 or positive,
1534 //                    except if N>1024 (|stages|>10), which returns a scale
1535 //                    value of -1, indicating error.
1536 //
1537 
1538 //
1539 // WebRtcSpl_ComplexFFT(...)
1540 //
1541 // Complex FFT
1542 //
1543 // Computes a complex 2^|stages|-point FFT on the input vector, which is in
1544 // bit-reversed order. The original content of the vector is destroyed in
1545 // the process, since the input is overwritten by the output, normal-ordered,
1546 // FFT vector. With x as the input complex vector, Y as the output complex
1547 // vector and with M = 2^|stages|, the following is computed:
1548 //
1549 //              M-1
1550 // Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
1551 //              i=0
1552 //
1553 // The implementations are optimized for speed, not for code size. It uses the
1554 // decimation-in-time algorithm with radix-2 butterfly technique.
1555 //
1556 // This routine prevents overflow by scaling by 2 before each FFT stage. This is
1557 // a fixed scaling, for proper normalization - there will be log2(n) passes, so
1558 // this results in an overall factor of 1/n, distributed to maximize arithmetic
1559 // accuracy.
1560 //
1561 // Input:
1562 //      - vector    : In pointer to complex vector containing 2^|stages| real
1563 //                    elements interleaved with 2^|stages| imaginary elements.
1564 //                    [ReImReImReIm....]
1565 //                    The output is in the Q0 domain.
1566 //
1567 //      - stages    : Number of FFT stages. Must be at least 3 and at most 10,
1568 //                    since the table WebRtcSpl_kSinTable1024[] is 1024
1569 //                    elements long.
1570 //
1571 //      - mode      : This parameter gives the user to choose how the FFT
1572 //                    should work.
1573 //                    mode==0: Low-complexity and Low-accuracy mode
1574 //                    mode==1: High-complexity and High-accuracy mode
1575 //
1576 // Output:
1577 //      - vector    : The output FFT vector is in the Q0 domain.
1578 //
1579 // Return value     : The scale parameter is always 0, except if N>1024,
1580 //                    which returns a scale value of -1, indicating error.
1581 //
1582 
1583 //
1584 // WebRtcSpl_AnalysisQMF(...)
1585 //
1586 // Splits a 0-2*F Hz signal into two sub bands: 0-F Hz and F-2*F Hz. The
1587 // current version has F = 8000, therefore, a super-wideband audio signal is
1588 // split to lower-band 0-8 kHz and upper-band 8-16 kHz.
1589 //
1590 // Input:
1591 //      - in_data       : Wide band speech signal, 320 samples (10 ms)
1592 //
1593 // Input & Output:
1594 //      - filter_state1 : Filter state for first All-pass filter
1595 //      - filter_state2 : Filter state for second All-pass filter
1596 //
1597 // Output:
1598 //      - low_band      : Lower-band signal 0-8 kHz band, 160 samples (10 ms)
1599 //      - high_band     : Upper-band signal 8-16 kHz band (flipped in frequency
1600 //                        domain), 160 samples (10 ms)
1601 //
1602 
1603 //
1604 // WebRtcSpl_SynthesisQMF(...)
1605 //
1606 // Combines the two sub bands (0-F and F-2*F Hz) into a signal of 0-2*F
1607 // Hz, (current version has F = 8000 Hz). So the filter combines lower-band
1608 // (0-8 kHz) and upper-band (8-16 kHz) channels to obtain super-wideband 0-16
1609 // kHz audio.
1610 //
1611 // Input:
1612 //      - low_band      : The signal with the 0-8 kHz band, 160 samples (10 ms)
1613 //      - high_band     : The signal with the 8-16 kHz band, 160 samples (10 ms)
1614 //
1615 // Input & Output:
1616 //      - filter_state1 : Filter state for first All-pass filter
1617 //      - filter_state2 : Filter state for second All-pass filter
1618 //
1619 // Output:
1620 //      - out_data      : Super-wideband speech signal, 0-16 kHz
1621 //
1622 
1623 // int16_t WebRtcSpl_SatW32ToW16(...)
1624 //
1625 // This function saturates a 32-bit word into a 16-bit word.
1626 //
1627 // Input:
1628 //      - value32   : The value of a 32-bit word.
1629 //
1630 // Output:
1631 //      - out16     : the saturated 16-bit word.
1632 //
1633 
1634 // int32_t WebRtc_MulAccumW16(...)
1635 //
1636 // This function multiply a 16-bit word by a 16-bit word, and accumulate this
1637 // value to a 32-bit integer.
1638 //
1639 // Input:
1640 //      - a    : The value of the first 16-bit word.
1641 //      - b    : The value of the second 16-bit word.
1642 //      - c    : The value of an 32-bit integer.
1643 //
1644 // Return Value: The value of a * b + c.
1645 //
1646