/external/XNNPACK/src/f32-conv-hwc2chw/ |
D | 3x3s2p1c3x4-neonfma-2x2.c | 104 float32x4_t vo0x0 = vld1q_f32(w); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local 105 float32x4_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 106 float32x4_t vo0x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 107 float32x4_t vo1x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 118 vo0x0 = vfmaq_laneq_f32(vo0x0, vk00c0, vi0x0, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 125 vo0x0 = vfmaq_laneq_f32(vo0x0, vk10c0, vi1x0, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 132 vo0x0 = vfmaq_laneq_f32(vo0x0, vk20c0, vi2x0, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 146 vo0x0 = vfmaq_laneq_f32(vo0x0, vk00c1, vi0x0, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 153 vo0x0 = vfmaq_laneq_f32(vo0x0, vk10c1, vi1x0, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() 160 vo0x0 = vfmaq_laneq_f32(vo0x0, vk20c1, vi2x0, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() [all …]
|
D | 3x3s2p1c3x4-neon-2x2.c | 104 float32x4_t vo0x0 = vld1q_f32(w); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local 105 float32x4_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 106 float32x4_t vo0x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 107 float32x4_t vo1x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 118 vo0x0 = vmlaq_lane_f32(vo0x0, vk00c0, vget_low_f32(vi0x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 125 vo0x0 = vmlaq_lane_f32(vo0x0, vk10c0, vget_low_f32(vi1x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 132 vo0x0 = vmlaq_lane_f32(vo0x0, vk20c0, vget_low_f32(vi2x0), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 146 vo0x0 = vmlaq_lane_f32(vo0x0, vk00c1, vget_high_f32(vi0x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 153 vo0x0 = vmlaq_lane_f32(vo0x0, vk10c1, vget_high_f32(vi1x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() 160 vo0x0 = vmlaq_lane_f32(vo0x0, vk20c1, vget_high_f32(vi2x0), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() [all …]
|
D | 3x3s2p1c3x4-wasmsimd-2x2.c | 105 v128_t vo0x0 = wasm_v128_load(w); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local 106 v128_t vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 107 v128_t vo0x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 108 v128_t vo1x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 119 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk00c0, wasm_v32x4_shuffle(vi0x0, vi0x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 126 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk10c0, wasm_v32x4_shuffle(vi1x0, vi1x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 133 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi2x0, vi2x0, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 147 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk00c1, wasm_v32x4_shuffle(vi0x0, vi0x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 154 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk10c1, wasm_v32x4_shuffle(vi1x0, vi1x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() 161 …vo0x0 = wasm_f32x4_add(vo0x0, wasm_f32x4_mul(vk20c1, wasm_v32x4_shuffle(vi2x0, vi2x0, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() [all …]
|
D | 3x3s2p1c3x4-sse-2x2.c | 104 __m128 vo0x0 = _mm_load_ps(w); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local 105 __m128 vo1x0 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 106 __m128 vo0x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 107 __m128 vo1x1 = vo0x0; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 118 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk00c0, _mm_shuffle_ps(vi0x0, vi0x0, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 125 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk10c0, _mm_shuffle_ps(vi1x0, vi1x0, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 132 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk20c0, _mm_shuffle_ps(vi2x0, vi2x0, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 146 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk00c1, _mm_shuffle_ps(vi0x0, vi0x0, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 153 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk10c1, _mm_shuffle_ps(vi1x0, vi1x0, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() 160 …vo0x0 = _mm_add_ps(vo0x0, _mm_mul_ps(vk20c1, _mm_shuffle_ps(vi2x0, vi2x0, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() [all …]
|