/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5s2p2-minmax-neonfma-3x4-acc2.c | 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 251 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 417 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 434 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
|
D | 5x5s2p2-minmax-neon-3x4.c | 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 251 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 414 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 431 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
|
D | 5x5s2p2-minmax-neonfma-3x4.c | 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 251 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 414 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 431 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
|
D | 5x5s2p2-minmax-neon-3x4-acc2.c | 222 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 251 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 417 const float32x4_t vi7x79BD = vextq_f32(vi7x1357, vi7x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 434 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x79BD, vget_low_f32(vwGHIJ), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c | 282 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local 348 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() 522 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local 539 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4.c | 282 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local 348 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() 519 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local 536 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c | 282 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local 348 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() 522 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local 539 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4.c | 282 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local 348 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() 519 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local 536 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
|
D | 5x5s2p2-minmax-sse-3x4.c | 310 const __m128 vi7x79BD = _mm_move_ss(vi7xF9BD, vi7x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() local 354 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 528 const __m128 vi7x79BD = _mm_move_ss(vi7xF9BD, vi7x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() local 542 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
|
D | 5x5s2p2-minmax-sse-3x4-acc2.c | 310 const __m128 vi7x79BD = _mm_move_ss(vi7xF9BD, vi7x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() local 354 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 531 const __m128 vi7x79BD = _mm_move_ss(vi7xF9BD, vi7x7135); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() local 545 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi7x79BD, vk31)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-3x4-acc2.c | 262 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local 328 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() 502 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local 519 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-3x4-acc2.c | 262 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local 328 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() 502 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local 519 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-3x4.c | 262 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local 328 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() 499 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local 516 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-3x4.c | 262 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local 328 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() 499 const v128_t vi7x79BD = wasm_v32x4_shuffle(vi7x1357, vi7x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local 516 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x79BD, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
|