/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-sse-4x4.c | 197 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 210 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 327 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 340 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 5x5p2-minmax-sse-2x4.c | 183 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 202 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 319 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 338 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 442 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 460 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
|
D | 3x3p1-minmax-sse-5x4.c | 221 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 236 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 374 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 389 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 183 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 202 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 321 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 340 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 446 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 464 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 183 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 202 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 323 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 342 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 450 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 468 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
|
D | 3x3p1-minmax-sse-6x4.c | 245 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 262 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 421 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 438 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 208 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 234 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 386 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 412 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 549 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 573 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 208 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 234 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 383 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 409 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 543 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 567 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 5x5p2-minmax-sse-4x4.c | 233 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 266 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 447 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 480 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 644 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 674 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 233 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 266 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 451 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 484 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 652 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 682 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 5x5p2-minmax-sse-5x4.c | 258 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 298 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 511 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vi5x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 551 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 745 const __m128 vi5x8567 = _mm_move_ss(vi5x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 781 const __m128 vi5x5678 = _mm_shuffle_ps(vi5x8567, vi5x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|