/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-sse-1x4.c | 115 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 122 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() 176 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 183 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
|
D | 3x3p1-minmax-sse-1x4-acc3.c | 115 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 122 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() 178 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 185 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
|
D | 3x3p1-minmax-sse-1x4-acc2.c | 115 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 122 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() 177 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 184 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
|
D | 3x3p1-minmax-sse-1x4-acc4.c | 115 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 122 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() 179 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 186 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
|
D | 3x3p1-minmax-sse-2x4.c | 139 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 148 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 223 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 232 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
|
D | 3x3p1-minmax-sse-2x4-acc2.c | 139 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 148 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 225 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 234 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-1x4-acc4.c | 148 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 165 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 248 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 265 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 342 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 354 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
|
D | 5x5p2-minmax-sse-1x4.c | 148 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 165 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 245 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 262 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 336 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 348 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
|
D | 5x5p2-minmax-sse-1x4-acc2.c | 148 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 165 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 246 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 263 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 338 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 350 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
|
D | 5x5p2-minmax-sse-1x4-acc3.c | 148 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 165 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 247 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 264 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 340 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 352 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 148 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 165 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 249 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 266 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 344 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 356 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
|
D | 3x3p1-minmax-sse-3x4.c | 163 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 174 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 270 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 281 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
|
D | 3x3p1-minmax-sse-4x4.c | 187 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 200 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 317 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 330 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 5x5p2-minmax-sse-2x4.c | 173 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 197 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 309 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 333 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 437 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 455 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
|
D | 3x3p1-minmax-sse-5x4.c | 211 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 226 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 364 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 379 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 173 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 197 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 311 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 335 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 441 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 459 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 173 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 197 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 313 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 337 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 445 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 463 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
|
D | 3x3p1-minmax-sse-6x4.c | 235 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 252 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 411 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 428 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 198 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 229 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 376 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 407 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 544 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 568 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 198 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 229 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 373 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 404 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 538 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 562 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 5x5p2-minmax-sse-4x4.c | 223 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 261 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 437 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 475 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 639 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 669 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 223 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 261 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 441 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 479 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 647 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 677 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 5x5p2-minmax-sse-5x4.c | 248 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 293 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 501 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vi0x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 546 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 740 const __m128 vi0x8567 = _mm_move_ss(vi0x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 776 const __m128 vi0x5678 = _mm_shuffle_ps(vi0x8567, vi0x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|