/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-scalar-5x1.c | 111 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 197 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
|
D | 3x3p1-minmax-scalar-6x1.c | 120 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 220 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-5x4.c | 104 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4() local 226 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-5x4.c | 104 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4() local 226 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c | 137 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local 252 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local
|
D | 3x3p1-minmax-ssse3-5x4.c | 130 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local 246 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c | 137 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local 252 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
|
D | 3x3p1-minmax-neon-5x4.c | 101 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() local 224 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_5x4() local
|
D | 3x3p1-minmax-neonfma-5x4.c | 101 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() local 224 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_5x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-6x4.c | 112 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4() local 252 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_6x4() local
|
D | 3x3p1-minmax-neon-6x4.c | 109 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local 250 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_6x4() local
|
D | 3x3p1-minmax-neonfma-6x4.c | 109 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local 250 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_6x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-6x4.c | 112 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4() local 252 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_6x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c | 148 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local 281 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c | 148 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local 281 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
|
D | 3x3p1-minmax-ssse3-6x4.c | 141 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local 275 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
|
D | 5x5p2-minmax-wasmsimd-x86-splat-5x4.c | 117 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 361 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 618 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
|
D | 5x5p2-minmax-wasmsimd-arm-splat-5x4.c | 117 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 361 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 618 v128_t vo4p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
|
D | 3x3p1-minmax-sse-5x4.c | 159 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local 319 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
|
D | 5x5p2-minmax-neonfma-5x4.c | 114 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 359 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 617 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
|
D | 5x5p2-minmax-neon-5x4.c | 114 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 359 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 617 float32x4_t vo4p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
|
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 143 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 387 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 644 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c | 143 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 387 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 644 v128_t vo4p0 = vbias; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
|
D | 3x3p1-minmax-sse-6x4.c | 174 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local 358 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_6x4() local
|
D | 5x5p2-minmax-sse-5x4.c | 142 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 395 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 661 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
|