Home
last modified time | relevance | path

Searched refs:vi3x3012 (Results 1 – 19 of 19) sorted by relevance

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-sse-1x4-acc4.c86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local
128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
228 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
243 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
244 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
326 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
338 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
D5x5p2-minmax-sse-1x4.c86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local
128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
225 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
240 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
241 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
320 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
332 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
D5x5p2-minmax-sse-1x4-acc2.c86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local
128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
226 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
241 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
242 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
322 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
334 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
D5x5p2-minmax-sse-1x4-acc3.c86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local
128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
227 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
242 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
243 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
324 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
336 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
D5x5p2-minmax-sse-1x4-acc5.c86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local
128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
229 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
244 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
245 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
328 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
340 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
D3x3p1-minmax-sse-2x4.c75 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local
124 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
136 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
212 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
D3x3p1-minmax-sse-2x4-acc2.c75 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local
124 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
136 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
214 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
D5x5p2-minmax-sse-2x4.c92 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local
145 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
166 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
167 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
281 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
302 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
303 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
414 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
432 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
D5x5p2-minmax-sse-2x4-acc2.c92 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local
145 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
166 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
167 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
283 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
304 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
305 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
418 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
436 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
D5x5p2-minmax-sse-2x4-acc3.c92 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local
145 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
166 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
167 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
285 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
306 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
307 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
422 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
440 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
D3x3p1-minmax-sse-3x4.c81 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
142 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
159 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
254 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
D5x5p2-minmax-sse-3x4-acc2.c98 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local
162 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
189 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
190 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
340 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
367 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
368 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
514 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
538 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
D5x5p2-minmax-sse-3x4.c98 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local
162 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
189 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
190 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
337 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
364 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
365 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
508 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
532 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
D3x3p1-minmax-sse-4x4.c87 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local
160 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
182 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
296 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
D3x3p1-minmax-sse-5x4.c93 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
178 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
205 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
338 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
D5x5p2-minmax-sse-4x4.c104 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
179 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
212 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
213 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
393 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
426 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
427 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
602 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
632 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
D5x5p2-minmax-sse-4x4-acc2.c104 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
179 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
212 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
213 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
397 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
430 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
431 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
610 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
640 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
D3x3p1-minmax-sse-6x4.c99 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
196 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
228 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
380 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
D5x5p2-minmax-sse-5x4.c110 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
196 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
235 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
236 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
449 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
488 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
489 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
696 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
732 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()