Lines Matching refs:summ
79 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
80 summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
81 …d_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
82 …d_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
83 …d_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
84 …d_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
85 …d_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
86 …d_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
87 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
88 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
89 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
90 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
91 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
92 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
93 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
111 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
112 summ = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
113 …d_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
114 …d_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
115 …d_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
116 …d_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
117 …d_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
118 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
119 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
120 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
121 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
122 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
123 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
124 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
143 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
144 summ = _mm256_madd_epi16(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
145 …d_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
146 …d_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
147 …d_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
148 …d_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
149 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
150 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
151 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
152 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
153 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
154 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
155 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
171 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
172 summ = _mm256_madd_epi16(q8, _mm256_loadu_si256((const __m256i*)(data+i-9 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
173 …d_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
174 …d_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
175 …d_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
176 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
177 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
178 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
179 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
180 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
181 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
182 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
201 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
202 summ = _mm256_madd_epi16(q7, _mm256_loadu_si256((const __m256i*)(data+i-8 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
203 …d_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
204 …d_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
205 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
206 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
207 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
208 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
209 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
210 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
211 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
225 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
226 summ = _mm256_madd_epi16(q6, _mm256_loadu_si256((const __m256i*)(data+i-7 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
227 …d_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
228 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
229 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
230 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
231 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
232 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
233 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
234 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
249 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
250 summ = _mm256_madd_epi16(q5, _mm256_loadu_si256((const __m256i*)(data+i-6 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
251 …d_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
252 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
253 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
254 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
255 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
256 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
257 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
269 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
270 summ = _mm256_madd_epi16(q4, _mm256_loadu_si256((const __m256i*)(data+i-5 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
271 …d_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
272 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
273 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
274 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
275 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
276 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
291 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
292 summ = _mm256_madd_epi16(q3, _mm256_loadu_si256((const __m256i*)(data+i-4 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
293 …d_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
294 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
295 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
296 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
297 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
307 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
308 summ = _mm256_madd_epi16(q2, _mm256_loadu_si256((const __m256i*)(data+i-3 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
309 …d_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
310 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
311 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
312 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
323 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
324 summ = _mm256_madd_epi16(q1, _mm256_loadu_si256((const __m256i*)(data+i-2 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
325 …d_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
326 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
327 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
335 __m256i summ; in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2() local
336 summ = _mm256_madd_epi16(q0, _mm256_loadu_si256((const __m256i*)(data+i-1 ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
337 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
338 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2()
434 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
435 summ = _mm256_mullo_epi32(q11, _mm256_loadu_si256((const __m256i*)(data+i-12))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
436 …o_epi32(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
437 …o_epi32(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
438 …o_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
439 …o_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
440 …o_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
441 …o_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
442 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
443 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
444 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
445 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
446 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
447 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
448 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
466 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
467 summ = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
468 …o_epi32(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
469 …o_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
470 …o_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
471 …o_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
472 …o_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
473 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
474 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
475 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
476 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
477 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
478 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
479 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
498 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
499 summ = _mm256_mullo_epi32(q9, _mm256_loadu_si256((const __m256i*)(data+i-10))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
500 …o_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
501 …o_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
502 …o_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
503 …o_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
504 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
505 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
506 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
507 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
508 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
509 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
510 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
526 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
527 summ = _mm256_mullo_epi32(q8, _mm256_loadu_si256((const __m256i*)(data+i-9))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
528 …o_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
529 …o_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
530 …o_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
531 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
532 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
533 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
534 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
535 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
536 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
537 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
556 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
557 summ = _mm256_mullo_epi32(q7, _mm256_loadu_si256((const __m256i*)(data+i-8))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
558 …o_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
559 …o_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
560 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
561 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
562 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
563 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
564 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
565 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
566 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
580 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
581 summ = _mm256_mullo_epi32(q6, _mm256_loadu_si256((const __m256i*)(data+i-7))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
582 …o_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
583 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
584 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
585 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
586 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
587 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
588 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
589 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
604 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
605 summ = _mm256_mullo_epi32(q5, _mm256_loadu_si256((const __m256i*)(data+i-6))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
606 …o_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
607 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
608 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
609 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
610 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
611 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
612 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
624 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
625 summ = _mm256_mullo_epi32(q4, _mm256_loadu_si256((const __m256i*)(data+i-5))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
626 …o_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
627 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
628 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
629 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
630 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
631 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
646 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
647 summ = _mm256_mullo_epi32(q3, _mm256_loadu_si256((const __m256i*)(data+i-4))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
648 …o_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
649 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
650 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
651 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
652 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
662 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
663 summ = _mm256_mullo_epi32(q2, _mm256_loadu_si256((const __m256i*)(data+i-3))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
664 …o_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
665 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
666 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
667 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
678 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
679 summ = _mm256_mullo_epi32(q1, _mm256_loadu_si256((const __m256i*)(data+i-2))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
680 …o_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); summ = _mm256_add_epi32(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
681 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
682 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
690 __m256i summ; in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2() local
691 summ = _mm256_mullo_epi32(q0, _mm256_loadu_si256((const __m256i*)(data+i-1))); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
692 summ = _mm256_sra_epi32(summ, cnt); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
693 …i256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ)); in FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2()
793 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
794 … summ = _mm256_mul_epi32(q11, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-12)))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
795 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-11)))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
796 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
797 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
798 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
799 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
800 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
801 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
802 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
803 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
804 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
805 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
806 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
807 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
825 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
826 … summ = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-11)))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
827 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
828 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
829 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
830 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
831 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
832 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
833 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
834 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
835 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
836 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
837 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
838 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
857 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
858 … summ = _mm256_mul_epi32(q9, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
859 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
860 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
861 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
862 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
863 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
864 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
865 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
866 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
867 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
868 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
869 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
885 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
886 … summ = _mm256_mul_epi32(q8, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
887 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
888 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
889 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
890 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
891 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
892 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
893 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
894 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
895 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
896 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
915 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
916 … summ = _mm256_mul_epi32(q7, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
917 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
918 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
919 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
920 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
921 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
922 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
923 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
924 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
925 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
939 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
940 … summ = _mm256_mul_epi32(q6, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
941 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
942 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
943 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
944 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
945 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
946 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
947 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
948 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
963 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
964 … summ = _mm256_mul_epi32(q5, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
965 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
966 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
967 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
968 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
969 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
970 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
971 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
983 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
984 … summ = _mm256_mul_epi32(q4, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
985 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
986 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
987 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
988 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
989 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
990 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1005 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
1006 … summ = _mm256_mul_epi32(q3, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1007 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1008 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1009 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1010 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1011 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1021 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
1022 … summ = _mm256_mul_epi32(q2, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1023 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1024 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1025 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1026 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1037 __m256i summ, mull; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
1038 … summ = _mm256_mul_epi32(q1, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1039 …cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1040 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1041 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1049 __m256i summ; in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2() local
1050 … summ = _mm256_mul_epi32(q0, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1051 summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()
1052 …sidual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ))); in FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2()