/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-2x4.c | 94 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 147 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 170 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 171 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 283 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 306 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 307 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 416 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 434 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 94 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 147 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 170 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 171 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 285 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 308 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 309 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 420 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 438 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 94 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 147 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 170 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 171 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 287 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 310 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 311 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 424 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 442 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 100 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 164 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 193 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 194 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 342 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 371 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 372 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 516 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 540 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 100 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 164 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 193 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 194 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 339 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 368 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 369 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 510 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 534 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 3x3p1-minmax-sse-4x4.c | 91 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 164 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 184 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 300 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 3x3p1-minmax-sse-5x4.c | 97 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 182 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 207 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 342 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-4x4.c | 106 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 181 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 216 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 217 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 395 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 430 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 431 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 604 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 634 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 106 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 181 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 216 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 217 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 399 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 434 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 435 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 612 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 642 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 3x3p1-minmax-sse-6x4.c | 103 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 200 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 230 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 384 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 112 __m128 vi5x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 198 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 239 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 240 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 451 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 492 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 493 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 698 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 734 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|