/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-4x4.c | 157 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 183 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 220 const __m128 vi7x2345 = _mm_shuffle_ps(vi7x3012, vi7x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 221 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 371 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 397 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 434 const __m128 vi7x2345 = _mm_shuffle_ps(vi7x3012, vi7x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 435 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 597 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 606 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() [all …]
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 157 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 183 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 220 const __m128 vi7x2345 = _mm_shuffle_ps(vi7x3012, vi7x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 221 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 375 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 401 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 438 const __m128 vi7x2345 = _mm_shuffle_ps(vi7x3012, vi7x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 439 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 605 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 614 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() [all …]
|
D | 3x3p1-minmax-sse-6x4.c | 168 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 204 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 232 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 352 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 388 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 171 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 200 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 243 const __m128 vi7x2345 = _mm_shuffle_ps(vi7x3012, vi7x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 244 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 424 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 453 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 496 const __m128 vi7x2345 = _mm_shuffle_ps(vi7x3012, vi7x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 497 vi7x3012 = vi7x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 690 const __m128 vi7x7456 = _mm_shuffle_ps(vi7x4567, vi7x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 700 const __m128 vi7x3456 = _mm_move_ss(vi7x7456, vi7x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() [all …]
|