Home
last modified time | relevance | path

Searched refs:vi2x7456 (Results 1 – 23 of 23) sorted by relevance

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-sse-1x4-acc4.c110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local
127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
210 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local
227 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
241 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
242 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
319 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local
325 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
[all …]
D5x5p2-minmax-sse-1x4.c110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local
127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
207 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local
224 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
238 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
239 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
313 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local
319 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
[all …]
D5x5p2-minmax-sse-1x4-acc2.c110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local
127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
208 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local
225 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
239 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
240 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
315 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local
321 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
[all …]
D5x5p2-minmax-sse-1x4-acc3.c110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local
127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
209 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local
226 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
240 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
241 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
317 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local
323 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
[all …]
D5x5p2-minmax-sse-1x4-acc5.c110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local
127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
211 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local
228 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
242 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
243 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
321 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local
327 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
[all …]
D3x3p1-minmax-sse-1x4.c93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local
104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
157 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local
168 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
D3x3p1-minmax-sse-1x4-acc3.c93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local
104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
159 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local
170 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
D3x3p1-minmax-sse-1x4-acc2.c93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local
104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
158 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local
169 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
D3x3p1-minmax-sse-1x4-acc4.c93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local
104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
160 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local
171 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
D3x3p1-minmax-sse-2x4.c106 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local
122 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
135 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
194 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local
210 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
D3x3p1-minmax-sse-2x4-acc2.c106 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local
122 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
135 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
196 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local
212 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
D5x5p2-minmax-sse-2x4.c124 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local
144 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
164 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
165 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
260 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local
280 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
300 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
301 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
406 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local
413 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
[all …]
D5x5p2-minmax-sse-2x4-acc2.c124 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local
144 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
164 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
165 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
262 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local
282 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
302 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
303 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
410 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local
417 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
[all …]
D5x5p2-minmax-sse-2x4-acc3.c124 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local
144 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
164 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
165 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
264 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local
284 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
304 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
305 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
414 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local
421 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
[all …]
D3x3p1-minmax-sse-3x4.c119 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
140 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
158 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
231 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local
252 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
D5x5p2-minmax-sse-3x4-acc2.c138 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local
161 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
187 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
188 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
316 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local
339 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
365 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
366 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
505 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local
513 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
[all …]
D5x5p2-minmax-sse-3x4.c138 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local
161 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
187 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
188 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
313 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local
336 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
362 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
363 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
499 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local
507 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
[all …]
D3x3p1-minmax-sse-4x4.c132 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local
158 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
181 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
268 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local
294 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
D3x3p1-minmax-sse-5x4.c145 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
176 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
204 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
305 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
336 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
D5x5p2-minmax-sse-4x4.c152 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
178 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
210 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
211 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
366 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
392 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
424 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
425 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
592 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
601 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
[all …]
D5x5p2-minmax-sse-4x4-acc2.c152 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
178 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
210 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
211 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
370 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
396 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
428 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
429 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
600 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
609 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
[all …]
D3x3p1-minmax-sse-6x4.c158 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
194 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
227 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
342 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
378 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
D5x5p2-minmax-sse-5x4.c166 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
195 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
233 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
234 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
419 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
448 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
486 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
487 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
685 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
695 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
[all …]