/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-3x4-acc2.c | 142 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 165 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 195 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 196 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 320 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 343 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 373 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 374 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 509 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 517 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-3x4.c | 142 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 165 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 195 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 196 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 317 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 340 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 370 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 371 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 503 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 511 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() [all …]
|
D | 3x3p1-minmax-sse-5x4.c | 153 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 184 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 208 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 313 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 344 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-4x4.c | 156 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 182 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 218 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 219 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 370 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 396 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 432 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 433 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 596 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 605 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() [all …]
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 156 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 182 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 218 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 219 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 374 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 400 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 436 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 437 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 604 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 613 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() [all …]
|
D | 3x3p1-minmax-sse-6x4.c | 166 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 202 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 231 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 350 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 386 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 170 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 199 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 241 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 242 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 423 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 452 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 494 const __m128 vi6x2345 = _mm_shuffle_ps(vi6x3012, vi6x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 495 vi6x3012 = vi6x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 689 const __m128 vi6x7456 = _mm_shuffle_ps(vi6x4567, vi6x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 699 const __m128 vi6x3456 = _mm_move_ss(vi6x7456, vi6x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() [all …]
|