/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-sse-1x4.c | 117 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 124 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() 178 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4() local 185 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4()
|
D | 3x3p1-minmax-sse-1x4-acc3.c | 117 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 124 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() 180 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3() local 187 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc3()
|
D | 3x3p1-minmax-sse-1x4-acc2.c | 117 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 124 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() 179 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2() local 186 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc2()
|
D | 3x3p1-minmax-sse-1x4-acc4.c | 117 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 124 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() 181 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4() local 188 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_1x4_acc4()
|
D | 3x3p1-minmax-sse-2x4.c | 141 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 150 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() 225 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4() local 234 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4()
|
D | 3x3p1-minmax-sse-2x4-acc2.c | 141 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 150 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() 227 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2() local 236 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-1x4-acc4.c | 150 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 166 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 250 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 266 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() 343 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4() local 355 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc4()
|
D | 5x5p2-minmax-sse-1x4.c | 150 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 166 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 247 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 263 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() 337 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4() local 349 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4()
|
D | 5x5p2-minmax-sse-1x4-acc2.c | 150 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 166 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 248 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 264 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() 339 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2() local 351 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc2()
|
D | 5x5p2-minmax-sse-1x4-acc3.c | 150 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 166 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 249 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 265 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() 341 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3() local 353 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc3()
|
D | 5x5p2-minmax-sse-1x4-acc5.c | 150 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 166 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 251 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 267 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() 345 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5() local 357 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_1x4_acc5()
|
D | 3x3p1-minmax-sse-3x4.c | 165 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 176 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() 272 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4() local 283 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_3x4()
|
D | 3x3p1-minmax-sse-4x4.c | 189 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 202 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() 319 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local 332 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
|
D | 5x5p2-minmax-sse-2x4.c | 175 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 198 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 311 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 334 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() 438 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4() local 456 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4()
|
D | 3x3p1-minmax-sse-5x4.c | 213 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 228 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() 366 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 381 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
|
D | 5x5p2-minmax-sse-2x4-acc2.c | 175 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 198 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 313 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 336 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() 442 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2() local 460 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc2()
|
D | 5x5p2-minmax-sse-2x4-acc3.c | 175 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 198 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 315 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 338 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() 446 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3() local 464 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_2x4_acc3()
|
D | 3x3p1-minmax-sse-6x4.c | 237 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 254 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() 413 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 430 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4()
|
D | 5x5p2-minmax-sse-3x4-acc2.c | 200 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 230 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 378 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 408 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() 545 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2() local 569 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4_acc2()
|
D | 5x5p2-minmax-sse-3x4.c | 200 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 230 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 375 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 405 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() 539 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4() local 563 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_3x4()
|
D | 5x5p2-minmax-sse-4x4.c | 225 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 262 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 439 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 476 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() 640 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local 670 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
|
D | 5x5p2-minmax-sse-4x4-acc2.c | 225 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 262 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 443 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 480 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() 648 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local 678 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
|
D | 5x5p2-minmax-sse-5x4.c | 250 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 294 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 503 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vi1x89AB); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 547 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 741 const __m128 vi1x8567 = _mm_move_ss(vi1x4567, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 777 const __m128 vi1x5678 = _mm_shuffle_ps(vi1x8567, vi1x8567, _MM_SHUFFLE(0, 3, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|