/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-sse-5x4.c | 223 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 238 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 376 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 391 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 3x3p1-minmax-sse-6x4.c | 247 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 264 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 423 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 440 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 210 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 235 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 388 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 413 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 550 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 574 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 210 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 235 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 385 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 410 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 544 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 568 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 5x5p2-minmax-sse-4x4.c | 235 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 267 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 449 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 481 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 645 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 675 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 235 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 267 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 453 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 485 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 653 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 683 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 5x5p2-minmax-sse-5x4.c | 260 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 299 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 513 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vi6x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 552 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 746 const __m128 vi6x8567 = _mm_move_ss(vi6x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 782 const __m128 vi6x5678 = _mm_shuffle_ps(vi6x8567, vi6x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|