1 /* Copyright (c) 2014, Cisco Systems, INC
2    Written by XiangMingZhu WeiZhou MinPeng YanWang
3 
4    Redistribution and use in source and binary forms, with or without
5    modification, are permitted provided that the following conditions
6    are met:
7 
8    - Redistributions of source code must retain the above copyright
9    notice, this list of conditions and the following disclaimer.
10 
11    - Redistributions in binary form must reproduce the above copyright
12    notice, this list of conditions and the following disclaimer in the
13    documentation and/or other materials provided with the distribution.
14 
15    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
16    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
17    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
18    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
19    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
23    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 
28 #if defined(HAVE_CONFIG_H)
29 #include "config.h"
30 #endif
31 
32 #include "x86/x86cpu.h"
33 #include "celt_lpc.h"
34 #include "pitch.h"
35 #include "pitch_sse.h"
36 #include "vq.h"
37 
38 #if defined(OPUS_HAVE_RTCD)
39 
40 # if defined(FIXED_POINT)
41 
42 #if defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)
43 
44 void (*const CELT_FIR_IMPL[OPUS_ARCHMASK + 1])(
45          const opus_val16 *x,
46          const opus_val16 *num,
47          opus_val16       *y,
48          int              N,
49          int              ord,
50          int              arch
51 ) = {
52   celt_fir_c,                /* non-sse */
53   celt_fir_c,
54   celt_fir_c,
55   MAY_HAVE_SSE4_1(celt_fir), /* sse4.1  */
56   MAY_HAVE_SSE4_1(celt_fir)  /* avx  */
57 };
58 
59 void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
60          const opus_val16 *x,
61          const opus_val16 *y,
62          opus_val32       sum[4],
63          int              len
64 ) = {
65   xcorr_kernel_c,                /* non-sse */
66   xcorr_kernel_c,
67   xcorr_kernel_c,
68   MAY_HAVE_SSE4_1(xcorr_kernel), /* sse4.1  */
69   MAY_HAVE_SSE4_1(xcorr_kernel)  /* avx  */
70 };
71 
72 #endif
73 
74 #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) ||  \
75  (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
76 
77 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
78          const opus_val16 *x,
79          const opus_val16 *y,
80          int              N
81 ) = {
82   celt_inner_prod_c,                /* non-sse */
83   celt_inner_prod_c,
84   MAY_HAVE_SSE2(celt_inner_prod),
85   MAY_HAVE_SSE4_1(celt_inner_prod), /* sse4.1  */
86   MAY_HAVE_SSE4_1(celt_inner_prod)  /* avx  */
87 };
88 
89 #endif
90 
91 # else
92 
93 #if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
94 
95 void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
96          const opus_val16 *x,
97          const opus_val16 *y,
98          opus_val32       sum[4],
99          int              len
100 ) = {
101   xcorr_kernel_c,                /* non-sse */
102   MAY_HAVE_SSE(xcorr_kernel),
103   MAY_HAVE_SSE(xcorr_kernel),
104   MAY_HAVE_SSE(xcorr_kernel),
105   MAY_HAVE_SSE(xcorr_kernel)
106 };
107 
108 opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
109          const opus_val16 *x,
110          const opus_val16 *y,
111          int              N
112 ) = {
113   celt_inner_prod_c,                /* non-sse */
114   MAY_HAVE_SSE(celt_inner_prod),
115   MAY_HAVE_SSE(celt_inner_prod),
116   MAY_HAVE_SSE(celt_inner_prod),
117   MAY_HAVE_SSE(celt_inner_prod)
118 };
119 
120 void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
121                     const opus_val16 *x,
122                     const opus_val16 *y01,
123                     const opus_val16 *y02,
124                     int               N,
125                     opus_val32       *xy1,
126                     opus_val32       *xy2
127 ) = {
128   dual_inner_prod_c,                /* non-sse */
129   MAY_HAVE_SSE(dual_inner_prod),
130   MAY_HAVE_SSE(dual_inner_prod),
131   MAY_HAVE_SSE(dual_inner_prod),
132   MAY_HAVE_SSE(dual_inner_prod)
133 };
134 
135 void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
136               opus_val32 *y,
137               opus_val32 *x,
138               int         T,
139               int         N,
140               opus_val16  g10,
141               opus_val16  g11,
142               opus_val16  g12
143 ) = {
144   comb_filter_const_c,                /* non-sse */
145   MAY_HAVE_SSE(comb_filter_const),
146   MAY_HAVE_SSE(comb_filter_const),
147   MAY_HAVE_SSE(comb_filter_const),
148   MAY_HAVE_SSE(comb_filter_const)
149 };
150 
151 
152 #endif
153 
154 #if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)
155 opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
156       celt_norm *_X, int *iy, int K, int N, int arch
157 ) = {
158   op_pvq_search_c,                /* non-sse */
159   op_pvq_search_c,
160   MAY_HAVE_SSE2(op_pvq_search),
161   MAY_HAVE_SSE2(op_pvq_search),
162   MAY_HAVE_SSE2(op_pvq_search)
163 };
164 #endif
165 
166 #endif
167 #endif
168