Home
last modified time | relevance | path

Searched refs:vo1x0 (Results 1 – 4 of 4) sorted by relevance

/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-neonfma-2x2.c105 float32x4_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
119 vo1x0 = vfmaq_laneq_f32(vo1x0, vk00c0, vi2x0, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
126 vo1x0 = vfmaq_laneq_f32(vo1x0, vk10c0, vi3x0, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
133 vo1x0 = vfmaq_laneq_f32(vo1x0, vk20c0, vi4x0, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
147 vo1x0 = vfmaq_laneq_f32(vo1x0, vk00c1, vi2x0, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
154 vo1x0 = vfmaq_laneq_f32(vo1x0, vk10c1, vi3x0, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
161 vo1x0 = vfmaq_laneq_f32(vo1x0, vk20c1, vi4x0, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
168 vo1x0 = vfmaq_laneq_f32(vo1x0, vk00c2, vi2x0, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
175 vo1x0 = vfmaq_laneq_f32(vo1x0, vk10c2, vi3x0, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
182 vo1x0 = vfmaq_laneq_f32(vo1x0, vk20c2, vi4x0, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
[all …]
D3x3s2p1c3x4-neon-2x2.c105 float32x4_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
119 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c0, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
126 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c0, vget_low_f32(vi3x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
133 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c0, vget_low_f32(vi4x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
147 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c1, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
154 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c1, vget_high_f32(vi3x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
161 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c1, vget_high_f32(vi4x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
168 vo1x0 = vmlaq_lane_f32(vo1x0, vk00c2, vget_high_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
175 vo1x0 = vmlaq_lane_f32(vo1x0, vk10c2, vget_high_f32(vi3x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
182 vo1x0 = vmlaq_lane_f32(vo1x0, vk20c2, vget_high_f32(vi4x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
[all …]
D3x3s2p1c3x4-wasmsimd-2x2.c106 v128_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
120vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c0, wasm_v32x4_shuffle(vi2x0, vi2x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
127vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk10c0, wasm_v32x4_shuffle(vi3x0, vi3x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
134vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi4x0, vi4x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
148vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c1, wasm_v32x4_shuffle(vi2x0, vi2x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
155vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk10c1, wasm_v32x4_shuffle(vi3x0, vi3x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
162vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi4x0, vi4x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
169vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk00c2, wasm_v32x4_shuffle(vi2x0, vi2x0, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
176vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk10c2, wasm_v32x4_shuffle(vi3x0, vi3x0, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
183vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk20c2, wasm_v32x4_shuffle(vi4x0, vi4x0, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
[all …]
D3x3s2p1c3x4-sse-2x2.c105 __m128 vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
119vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk00c0, _mm_shuffle_ps(vi2x0, vi2x0, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
126vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk10c0, _mm_shuffle_ps(vi3x0, vi3x0, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
133vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk20c0, _mm_shuffle_ps(vi4x0, vi4x0, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
147vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk00c1, _mm_shuffle_ps(vi2x0, vi2x0, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
154vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk10c1, _mm_shuffle_ps(vi3x0, vi3x0, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
161vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi4x0, vi4x0, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
168vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk00c2, _mm_shuffle_ps(vi2x0, vi2x0, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
175vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk10c2, _mm_shuffle_ps(vi3x0, vi3x0, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
182vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk20c2, _mm_shuffle_ps(vi4x0, vi4x0, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
[all …]