/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-1x4-acc4.c | 84 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 126 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 139 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 140 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 226 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 239 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 240 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 324 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 336 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
|
D | 5x5p2-minmax-sse-1x4.c | 84 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 126 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 139 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 140 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 223 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 236 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 237 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 318 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 330 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
|
D | 5x5p2-minmax-sse-1x4-acc2.c | 84 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 126 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 139 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 140 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 224 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 237 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 238 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 320 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 332 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
|
D | 5x5p2-minmax-sse-1x4-acc3.c | 84 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 126 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 139 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 140 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 225 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 238 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 239 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 322 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 334 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 84 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 126 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 139 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 140 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 227 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 240 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 241 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 326 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 338 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
|
D | 3x3p1-minmax-sse-1x4.c | 65 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 102 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() 111 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() 166 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
|
D | 3x3p1-minmax-sse-1x4-acc3.c | 65 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 102 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() 111 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() 168 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
|
D | 3x3p1-minmax-sse-1x4-acc2.c | 65 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 102 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() 111 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() 167 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
|
D | 3x3p1-minmax-sse-1x4-acc4.c | 65 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 102 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() 111 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() 169 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
|
D | 3x3p1-minmax-sse-2x4.c | 71 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 120 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 134 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 208 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
|
D | 3x3p1-minmax-sse-2x4-acc2.c | 71 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 120 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 134 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 210 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4.c | 90 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 143 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 162 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 163 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 279 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 298 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 299 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 412 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 430 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 90 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 143 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 162 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 163 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 281 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 300 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 301 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 416 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 434 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 90 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 143 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 162 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 163 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 283 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 302 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 303 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 420 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 438 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
|
D | 3x3p1-minmax-sse-3x4.c | 77 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 138 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 157 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 250 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 96 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 160 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 185 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 186 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 338 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 363 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 364 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 512 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 536 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 96 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 160 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 185 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 186 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 335 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 360 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 361 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 506 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 530 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 3x3p1-minmax-sse-4x4.c | 83 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 156 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 180 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 292 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 3x3p1-minmax-sse-5x4.c | 89 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 174 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 203 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 334 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-4x4.c | 102 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 177 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 208 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 209 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 391 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 422 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 423 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 600 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 630 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 102 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 177 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 208 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 209 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 395 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 426 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 427 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 608 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 638 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 3x3p1-minmax-sse-6x4.c | 95 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 192 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 226 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 376 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 108 __m128 vi1x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 194 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 231 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 232 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 447 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 484 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 485 vi1x3012 = vi1x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 694 const __m128 vi1x3456 = _mm_move_ss(vi1x7456, vi1x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 730 const __m128 vi1x2345 = _mm_shuffle_ps(vi1x3012, vi1x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|