/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-scalar-3x1.c | 135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 262 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local 367 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
|
D | 3x3p1-minmax-scalar-3x1.c | 91 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local 149 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
|
D | 5x5p2-minmax-scalar-3x1-acc2.c | 135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 265 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local 373 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
|
D | 3x3p1-minmax-scalar-4x1.c | 100 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local 172 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
|
D | 5x5s2p2-minmax-scalar-3x1.c | 170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 304 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local 381 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
|
D | 3x3s2p1-minmax-scalar-3x1.c | 104 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local 181 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-3x4.c | 86 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4() local 172 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_3x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-3x4.c | 86 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4() local 172 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_3x4() local
|
D | 3x3p1-minmax-scalar-5x1.c | 109 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local 195 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
|
D | 3x3p1-minmax-neonfma-3x4.c | 83 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local 170 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_3x4() local
|
D | 3x3p1-minmax-neon-3x4.c | 83 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local 170 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_3x4() local
|
D | 5x5s2p2-minmax-scalar-3x1-acc2.c | 170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 307 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local 387 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
|
D | 3x3p1-minmax-ssse3-3x4.c | 106 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() local 186 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_3x4() local
|
D | 3x3p1-minmax-wasmsimd-x86-splat-4x4.c | 94 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4() local 198 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-splat-4x4.c | 94 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4() local 198 v128_t vo2p0 = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4() local
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-3x4.c | 113 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4() local 192 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_3x4() local
|
D | 3x3p1-minmax-scalar-6x1.c | 118 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local 218 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-3x4.c | 113 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4() local 192 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_3x4() local
|
D | 3x3s2p1-minmax-neonfma-3x4.c | 94 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local 173 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
|
D | 3x3p1-minmax-neonfma-4x4.c | 91 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() local 196 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() local
|
D | 3x3p1-minmax-ssse3-4x4.c | 117 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local 215 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
|
D | 3x3s2p1-minmax-scalar-4x1.c | 118 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local 215 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
|
D | 3x3s2p1-minmax-neon-3x4.c | 94 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local 173 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
|
D | 3x3p1-minmax-neon-4x4.c | 91 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() local 196 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() local
|
D | 5x5p2-minmax-neonfma-3x4.c | 96 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local 269 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local 453 float32x4_t vo2p0 = vdupq_lane_f32(vget_low_f32(vw0123), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_3x4() local
|