Home
last modified time | relevance | path

Searched refs:vk01c0 (Results 1 – 4 of 4) sorted by relevance

/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-neonfma-2x2.c186 const float32x4_t vk01c0 = vld1q_f32(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
188 vo0x0 = vfmaq_laneq_f32(vo0x0, vk01c0, vi0x1, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
189 vo1x0 = vfmaq_laneq_f32(vo1x0, vk01c0, vi2x1, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
190 vo0x1 = vfmaq_laneq_f32(vo0x1, vk01c0, vi0x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
191 vo1x1 = vfmaq_laneq_f32(vo1x1, vk01c0, vi2x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
448 const float32x4_t vk01c0 = vld1q_f32(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
450 vo0x0 = vfmaq_laneq_f32(vo0x0, vk01c0, vi0x1, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
451 vo1x0 = vfmaq_laneq_f32(vo1x0, vk01c0, vi2x1, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
453 vo0x1 = vfmaq_laneq_f32(vo0x1, vk01c0, vi0x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
454 vo1x1 = vfmaq_laneq_f32(vo1x1, vk01c0, vi2x2, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
D3x3s2p1c3x4-neon-2x2.c186 const float32x4_t vk01c0 = vld1q_f32(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
188 vo0x0 = vmlaq_lane_f32(vo0x0, vk01c0, vget_low_f32(vi0x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
189 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c0, vget_low_f32(vi2x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
190 vo0x1 = vmlaq_lane_f32(vo0x1, vk01c0, vget_high_f32(vi0x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
191 vo1x1 = vmlaq_lane_f32(vo1x1, vk01c0, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
446 const float32x4_t vk01c0 = vld1q_f32(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
448 vo0x0 = vmlaq_lane_f32(vo0x0, vk01c0, vget_low_f32(vi0x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
449 vo1x0 = vmlaq_lane_f32(vo1x0, vk01c0, vget_low_f32(vi2x1), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
451 vo0x1 = vmlaq_lane_f32(vo0x1, vk01c0, vget_high_f32(vi0x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
452 vo1x1 = vmlaq_lane_f32(vo1x1, vk01c0, vget_high_f32(vi2x2), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
D3x3s2p1c3x4-wasmsimd-2x2.c187 const v128_t vk01c0 = wasm_v128_load(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
189 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi0x1, vi0x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
190 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi2x1, vi2x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
191 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi0x2, vi0x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
192 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi2x2, vi2x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
448 const v128_t vk01c0 = wasm_v128_load(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
450 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi0x1, vi0x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
451 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi2x1, vi2x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
453 …vo0x1 = wasm_f32x4_add(vo0x1, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi0x2, vi0x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
454 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk01c0, wasm_v32x4_shuffle(vi2x2, vi2x2, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
D3x3s2p1c3x4-sse-2x2.c186 const __m128 vk01c0 = _mm_load_ps(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
188 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi0x1, vi0x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
189 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi2x1, vi2x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
190 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi0x2, vi0x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
191 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
448 const __m128 vk01c0 = _mm_load_ps(w + 40); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
450 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi0x1, vi0x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
451 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi2x1, vi2x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
453 …vo0x1 = _mm_add_ps(vo0x1, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi0x2, vi0x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
454 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk01c0, _mm_shuffle_ps(vi2x2, vi2x2, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()