1 /*
2  *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "common_audio/signal_processing/include/signal_processing_library.h"
12 
13 // Version of WebRtcSpl_DownsampleFast() for MIPS platforms.
WebRtcSpl_DownsampleFast_mips(const int16_t * data_in,size_t data_in_length,int16_t * data_out,size_t data_out_length,const int16_t * __restrict coefficients,size_t coefficients_length,int factor,size_t delay)14 int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
15                                   size_t data_in_length,
16                                   int16_t* data_out,
17                                   size_t data_out_length,
18                                   const int16_t* __restrict coefficients,
19                                   size_t coefficients_length,
20                                   int factor,
21                                   size_t delay) {
22   int i;
23   int j;
24   int k;
25   int32_t out_s32 = 0;
26   size_t endpos = delay + factor * (data_out_length - 1) + 1;
27 
28   int32_t  tmp1, tmp2, tmp3, tmp4, factor_2;
29   int16_t* p_coefficients;
30   int16_t* p_data_in;
31   int16_t* p_data_in_0 = (int16_t*)&data_in[delay];
32   int16_t* p_coefficients_0 = (int16_t*)&coefficients[0];
33 #if !defined(MIPS_DSP_R1_LE)
34   int32_t max_16 = 0x7FFF;
35   int32_t min_16 = 0xFFFF8000;
36 #endif  // #if !defined(MIPS_DSP_R1_LE)
37 
38   // Return error if any of the running conditions doesn't meet.
39   if (data_out_length == 0 || coefficients_length == 0
40                            || data_in_length < endpos) {
41     return -1;
42   }
43 #if defined(MIPS_DSP_R2_LE)
44   __asm __volatile (
45     ".set        push                                                \n\t"
46     ".set        noreorder                                           \n\t"
47     "subu        %[i],            %[endpos],       %[delay]          \n\t"
48     "sll         %[factor_2],     %[factor],       1                 \n\t"
49    "1:                                                               \n\t"
50     "move        %[p_data_in],    %[p_data_in_0]                     \n\t"
51     "mult        $zero,           $zero                              \n\t"
52     "move        %[p_coefs],      %[p_coefs_0]                       \n\t"
53     "sra         %[j],            %[coef_length],  2                 \n\t"
54     "beq         %[j],            $zero,           3f                \n\t"
55     " andi       %[k],            %[coef_length],  3                 \n\t"
56    "2:                                                               \n\t"
57     "lwl         %[tmp1],         1(%[p_data_in])                    \n\t"
58     "lwl         %[tmp2],         3(%[p_coefs])                      \n\t"
59     "lwl         %[tmp3],         -3(%[p_data_in])                   \n\t"
60     "lwl         %[tmp4],         7(%[p_coefs])                      \n\t"
61     "lwr         %[tmp1],         -2(%[p_data_in])                   \n\t"
62     "lwr         %[tmp2],         0(%[p_coefs])                      \n\t"
63     "lwr         %[tmp3],         -6(%[p_data_in])                   \n\t"
64     "lwr         %[tmp4],         4(%[p_coefs])                      \n\t"
65     "packrl.ph   %[tmp1],         %[tmp1],         %[tmp1]           \n\t"
66     "packrl.ph   %[tmp3],         %[tmp3],         %[tmp3]           \n\t"
67     "dpa.w.ph    $ac0,            %[tmp1],         %[tmp2]           \n\t"
68     "dpa.w.ph    $ac0,            %[tmp3],         %[tmp4]           \n\t"
69     "addiu       %[j],            %[j],            -1                \n\t"
70     "addiu       %[p_data_in],    %[p_data_in],    -8                \n\t"
71     "bgtz        %[j],            2b                                 \n\t"
72     " addiu      %[p_coefs],      %[p_coefs],      8                 \n\t"
73    "3:                                                               \n\t"
74     "beq         %[k],            $zero,           5f                \n\t"
75     " nop                                                            \n\t"
76    "4:                                                               \n\t"
77     "lhu         %[tmp1],         0(%[p_data_in])                    \n\t"
78     "lhu         %[tmp2],         0(%[p_coefs])                      \n\t"
79     "addiu       %[p_data_in],    %[p_data_in],    -2                \n\t"
80     "addiu       %[k],            %[k],            -1                \n\t"
81     "dpa.w.ph    $ac0,            %[tmp1],         %[tmp2]           \n\t"
82     "bgtz        %[k],            4b                                 \n\t"
83     " addiu      %[p_coefs],      %[p_coefs],      2                 \n\t"
84    "5:                                                               \n\t"
85     "extr_r.w    %[out_s32],      $ac0,            12                \n\t"
86     "addu        %[p_data_in_0],  %[p_data_in_0],  %[factor_2]       \n\t"
87     "subu        %[i],            %[i],            %[factor]         \n\t"
88     "shll_s.w    %[out_s32],      %[out_s32],      16                \n\t"
89     "sra         %[out_s32],      %[out_s32],      16                \n\t"
90     "sh          %[out_s32],      0(%[data_out])                     \n\t"
91     "bgtz        %[i],            1b                                 \n\t"
92     " addiu      %[data_out],     %[data_out],     2                 \n\t"
93     ".set        pop                                                 \n\t"
94     : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
95       [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in),
96       [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
97       [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
98       [i] "=&r" (i), [k] "=&r" (k)
99     : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
100       [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
101       [delay] "r" (delay), [factor] "r" (factor)
102     : "memory", "hi", "lo"
103  );
104 #else  // #if defined(MIPS_DSP_R2_LE)
105   __asm __volatile (
106     ".set        push                                                \n\t"
107     ".set        noreorder                                           \n\t"
108     "sll         %[factor_2],     %[factor],       1                 \n\t"
109     "subu        %[i],            %[endpos],       %[delay]          \n\t"
110    "1:                                                               \n\t"
111     "move        %[p_data_in],    %[p_data_in_0]                     \n\t"
112     "addiu       %[out_s32],      $zero,           2048              \n\t"
113     "move        %[p_coefs],      %[p_coefs_0]                       \n\t"
114     "sra         %[j],            %[coef_length],  1                 \n\t"
115     "beq         %[j],            $zero,           3f                \n\t"
116     " andi       %[k],            %[coef_length],  1                 \n\t"
117    "2:                                                               \n\t"
118     "lh          %[tmp1],         0(%[p_data_in])                    \n\t"
119     "lh          %[tmp2],         0(%[p_coefs])                      \n\t"
120     "lh          %[tmp3],         -2(%[p_data_in])                   \n\t"
121     "lh          %[tmp4],         2(%[p_coefs])                      \n\t"
122     "mul         %[tmp1],         %[tmp1],         %[tmp2]           \n\t"
123     "addiu       %[p_coefs],      %[p_coefs],      4                 \n\t"
124     "mul         %[tmp3],         %[tmp3],         %[tmp4]           \n\t"
125     "addiu       %[j],            %[j],            -1                \n\t"
126     "addiu       %[p_data_in],    %[p_data_in],    -4                \n\t"
127     "addu        %[tmp1],         %[tmp1],         %[tmp3]           \n\t"
128     "bgtz        %[j],            2b                                 \n\t"
129     " addu       %[out_s32],      %[out_s32],      %[tmp1]           \n\t"
130    "3:                                                               \n\t"
131     "beq         %[k],            $zero,           4f                \n\t"
132     " nop                                                            \n\t"
133     "lh          %[tmp1],         0(%[p_data_in])                    \n\t"
134     "lh          %[tmp2],         0(%[p_coefs])                      \n\t"
135     "mul         %[tmp1],         %[tmp1],         %[tmp2]           \n\t"
136     "addu        %[out_s32],      %[out_s32],      %[tmp1]           \n\t"
137    "4:                                                               \n\t"
138     "sra         %[out_s32],      %[out_s32],      12                \n\t"
139     "addu        %[p_data_in_0],  %[p_data_in_0],  %[factor_2]       \n\t"
140 #if defined(MIPS_DSP_R1_LE)
141     "shll_s.w    %[out_s32],      %[out_s32],      16                \n\t"
142     "sra         %[out_s32],      %[out_s32],      16                \n\t"
143 #else  // #if defined(MIPS_DSP_R1_LE)
144     "slt         %[tmp1],         %[max_16],       %[out_s32]        \n\t"
145     "movn        %[out_s32],      %[max_16],       %[tmp1]           \n\t"
146     "slt         %[tmp1],         %[out_s32],      %[min_16]         \n\t"
147     "movn        %[out_s32],      %[min_16],       %[tmp1]           \n\t"
148 #endif  // #if defined(MIPS_DSP_R1_LE)
149     "subu        %[i],            %[i],            %[factor]         \n\t"
150     "sh          %[out_s32],      0(%[data_out])                     \n\t"
151     "bgtz        %[i],            1b                                 \n\t"
152     " addiu      %[data_out],     %[data_out],     2                 \n\t"
153     ".set        pop                                                 \n\t"
154     : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
155       [tmp4] "=&r" (tmp4), [p_data_in] "=&r" (p_data_in), [k] "=&r" (k),
156       [p_data_in_0] "+r" (p_data_in_0), [p_coefs] "=&r" (p_coefficients),
157       [j] "=&r" (j), [out_s32] "=&r" (out_s32), [factor_2] "=&r" (factor_2),
158       [i] "=&r" (i)
159     : [coef_length] "r" (coefficients_length), [data_out] "r" (data_out),
160       [p_coefs_0] "r" (p_coefficients_0), [endpos] "r" (endpos),
161 #if !defined(MIPS_DSP_R1_LE)
162       [max_16] "r" (max_16), [min_16] "r" (min_16),
163 #endif  // #if !defined(MIPS_DSP_R1_LE)
164       [delay] "r" (delay), [factor] "r" (factor)
165     : "memory", "hi", "lo"
166   );
167 #endif  // #if defined(MIPS_DSP_R2_LE)
168   return 0;
169 }
170