/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c | 241 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 272 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 495 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 526 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() 734 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local 764 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
|
D | 5x5p2-minmax-neonfma-5x4.c | 212 const float32x4_t vi8x2345 = vextq_f32(vi8x0123, vi8x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 243 vo4p0 = vfmaq_lane_f32(vo4p0, vi8x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() 467 const float32x4_t vi8x2345 = vextq_f32(vi8x0123, vi8x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 498 vo4p0 = vfmaq_lane_f32(vo4p0, vi8x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() 707 const float32x4_t vi8x2345 = vextq_f32(vi8x0123, vi8x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local 737 vo4p0 = vfmaq_lane_f32(vo4p0, vi8x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
|
D | 5x5p2-minmax-neon-5x4.c | 212 const float32x4_t vi8x2345 = vextq_f32(vi8x0123, vi8x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 243 vo4p0 = vmlaq_lane_f32(vo4p0, vi8x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() 467 const float32x4_t vi8x2345 = vextq_f32(vi8x0123, vi8x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 498 vo4p0 = vmlaq_lane_f32(vo4p0, vi8x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() 707 const float32x4_t vi8x2345 = vextq_f32(vi8x0123, vi8x4567, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local 737 vo4p0 = vmlaq_lane_f32(vo4p0, vi8x2345, vget_low_f32(vwKLMN), 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
|
D | 5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c | 241 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 272 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 495 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 526 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() 734 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local 764 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
|
D | 5x5p2-minmax-sse-5x4.c | 245 const __m128 vi8x2345 = _mm_shuffle_ps(vi8x3012, vi8x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 291 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 498 const __m128 vi8x2345 = _mm_shuffle_ps(vi8x3012, vi8x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 544 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() 737 const __m128 vi8x2345 = _mm_shuffle_ps(vi8x3012, vi8x7456, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local 774 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi8x2345, vk40)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
|
D | 5x5p2-minmax-wasmsimd-x86-splat-5x4.c | 215 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 246 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() 469 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 500 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() 708 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local 738 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
|
D | 5x5p2-minmax-wasmsimd-arm-splat-5x4.c | 215 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 246 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() 469 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 500 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() 708 const v128_t vi8x2345 = wasm_v32x4_shuffle(vi8x0123, vi8x4567, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local 738 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi8x2345, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
|