/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-2x4.c | 127 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 147 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 170 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 171 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 263 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 283 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 306 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 307 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 409 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 416 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() [all …]
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 127 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 147 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 170 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 171 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 265 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 285 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 308 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 309 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 413 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 420 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 127 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 147 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 170 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 171 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 267 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 287 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 310 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 311 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 417 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 424 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() [all …]
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 141 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 164 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 193 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 194 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 319 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 342 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 371 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 372 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 508 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 516 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-3x4.c | 141 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 164 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 193 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 194 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 316 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 339 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 368 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 369 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 502 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 510 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() [all …]
|
D | 3x3p1-minmax-sse-4x4.c | 138 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 164 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 184 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 274 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 300 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 3x3p1-minmax-sse-5x4.c | 151 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 182 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 207 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 311 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 342 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-4x4.c | 155 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 181 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 216 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 217 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 369 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 395 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 430 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 431 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 595 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 604 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() [all …]
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 155 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 181 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 216 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 217 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 373 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 399 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 434 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 435 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 603 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 612 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() [all …]
|
D | 3x3p1-minmax-sse-6x4.c | 164 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 200 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 230 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 348 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 384 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 169 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 198 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 239 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 240 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 422 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 451 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 492 const __m128 vi5x2345 = _mm_shuffle_ps(vi5x3012, vi5x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 493 vi5x3012 = vi5x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 688 const __m128 vi5x7456 = _mm_shuffle_ps(vi5x4567, vi5x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 698 const __m128 vi5x3456 = _mm_move_ss(vi5x7456, vi5x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() [all …]
|