/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-sse-1x4-acc4.c | 110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 210 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 227 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 241 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 242 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 319 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 325 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() [all …]
|
D | 5x5p2-minmax-sse-1x4.c | 110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 207 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 224 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 238 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 239 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 313 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 319 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc2.c | 110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 208 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 225 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 239 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 240 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 315 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 321 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc3.c | 110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 209 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 226 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 240 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 241 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 317 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 323 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() [all …]
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 110 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 127 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 141 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 142 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 211 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 228 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 242 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 243 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 321 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 327 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() [all …]
|
D | 3x3p1-minmax-sse-1x4.c | 93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() 112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() 157 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 168 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
|
D | 3x3p1-minmax-sse-1x4-acc3.c | 93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() 112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() 159 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 170 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
|
D | 3x3p1-minmax-sse-1x4-acc2.c | 93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() 112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() 158 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 169 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
|
D | 3x3p1-minmax-sse-1x4-acc4.c | 93 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 104 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() 112 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() 160 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 171 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
|
D | 3x3p1-minmax-sse-2x4.c | 106 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 122 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 135 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 194 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 210 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
|
D | 3x3p1-minmax-sse-2x4-acc2.c | 106 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 122 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 135 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 196 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 212 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4.c | 124 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 144 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 164 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 165 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 260 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 280 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 300 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 301 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 406 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 413 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() [all …]
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 124 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 144 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 164 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 165 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 262 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 282 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 302 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 303 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 410 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 417 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 124 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 144 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 164 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 165 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 264 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 284 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 304 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 305 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 414 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 421 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() [all …]
|
D | 3x3p1-minmax-sse-3x4.c | 119 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 140 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 158 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 231 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 252 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 138 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 161 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 187 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 188 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 316 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 339 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 365 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 366 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 505 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 513 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() [all …]
|
D | 5x5p2-minmax-sse-3x4.c | 138 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 161 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 187 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 188 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 313 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 336 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 362 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 363 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 499 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 507 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() [all …]
|
D | 3x3p1-minmax-sse-4x4.c | 132 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 158 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 181 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 268 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 294 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 3x3p1-minmax-sse-5x4.c | 145 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 176 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 204 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 305 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 336 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-4x4.c | 152 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 178 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 210 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 211 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 366 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 392 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 424 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 425 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 592 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 601 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() [all …]
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 152 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 178 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 210 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 211 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 370 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 396 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 428 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 429 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 600 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 609 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() [all …]
|
D | 3x3p1-minmax-sse-6x4.c | 158 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 194 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 227 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 342 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 378 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-5x4.c | 166 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 195 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 233 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 234 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 419 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 448 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 486 const __m128 vi2x2345 = _mm_shuffle_ps(vi2x3012, vi2x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 487 vi2x3012 = vi2x7456; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 685 const __m128 vi2x7456 = _mm_shuffle_ps(vi2x4567, vi2x4567, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 695 const __m128 vi2x3456 = _mm_move_ss(vi2x7456, vi2x3012); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() [all …]
|