/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-1x4-acc4.c | 86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 228 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 243 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 244 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 326 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 338 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
|
D | 5x5p2-minmax-sse-1x4.c | 86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 225 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 240 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 241 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 320 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 332 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
|
D | 5x5p2-minmax-sse-1x4-acc2.c | 86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 226 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 241 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 242 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 322 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 334 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
|
D | 5x5p2-minmax-sse-1x4-acc3.c | 86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 227 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 242 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 243 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 324 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 336 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 86 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 128 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 143 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 144 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 229 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 244 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 245 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 328 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 340 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
|
D | 3x3p1-minmax-sse-2x4.c | 75 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 124 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 136 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 212 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
|
D | 3x3p1-minmax-sse-2x4-acc2.c | 75 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 124 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 136 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 214 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4.c | 92 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 145 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 166 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 167 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 281 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 302 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 303 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 414 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 432 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 92 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 145 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 166 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 167 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 283 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 304 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 305 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 418 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 436 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 92 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 145 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 166 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 167 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 285 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 306 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 307 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 422 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 440 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
|
D | 3x3p1-minmax-sse-3x4.c | 81 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 142 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 159 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 254 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 98 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 162 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 189 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 190 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 340 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 367 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 368 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 514 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 538 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 98 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 162 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 189 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 190 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 337 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 364 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 365 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 508 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 532 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 3x3p1-minmax-sse-4x4.c | 87 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 160 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 182 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 296 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 3x3p1-minmax-sse-5x4.c | 93 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 178 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 205 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 338 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-4x4.c | 104 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 179 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 212 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 213 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 393 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 426 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 427 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 602 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 632 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 104 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 179 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 212 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 213 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 397 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 430 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 431 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 610 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 640 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 3x3p1-minmax-sse-6x4.c | 99 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 196 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 228 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 380 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 110 __m128 vi3x3012 = _mm_setzero_ps(); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 196 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 235 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 236 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 449 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 488 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 489 vi3x3012 = vi3x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 696 const __m128 vi3x3456 = _mm_move_ss(vi3x7456, vi3x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 732 const __m128 vi3x2345 = _mm_shuffle_ps(vi3x3012, vi3x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|