Home
last modified time | relevance | path

Searched refs:vi7x6789 (Results 1 – 21 of 21) sorted by relevance

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-wasmsimd-x86-loadsplat-5x4.c328 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
355 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
360 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
582 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
609 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
614 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
813 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4() local
838 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
843 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_5x4()
D5x5p2-minmax-neonfma-5x4.c299 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
326 vo4p0 = vfmaq_lane_f32(vo4p0, vi7x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
331 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
554 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
581 vo4p0 = vfmaq_lane_f32(vo4p0, vi7x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
586 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
787 const float32x4_t vi7x6789 = vextq_f32(vi7x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4() local
812 vo4p0 = vfmaq_lane_f32(vo4p0, vi7x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
817 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_5x4()
D5x5p2-minmax-neon-5x4.c299 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
326 vo4p0 = vmlaq_lane_f32(vo4p0, vi7x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
331 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
554 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
581 vo4p0 = vmlaq_lane_f32(vo4p0, vi7x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
586 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
787 const float32x4_t vi7x6789 = vextq_f32(vi7x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4() local
812 vo4p0 = vmlaq_lane_f32(vo4p0, vi7x6789, vget_low_f32(vwKLMN), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
817 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_5x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-5x4.c328 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
355 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
360 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
582 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
609 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
614 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
813 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4() local
838 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
843 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_5x4()
D5x5p2-minmax-sse-5x4.c336 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vi7x89AB, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
358 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
362 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
589 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vi7x89AB, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
611 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
615 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
819 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vzero, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4() local
841 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi7x6789, vk34)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
845 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_5x4()
D5x5p2-minmax-wasmsimd-x86-splat-5x4.c302 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
329 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
334 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
556 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
583 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
588 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
787 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
812 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
817 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
D5x5p2-minmax-wasmsimd-arm-splat-5x4.c302 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
329 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
334 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
556 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
583 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
588 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
787 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
812 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwKLMN, vwKLMN, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
817 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
D5x5p2-minmax-neon-4x4.c265 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
291 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
483 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
509 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
681 const float32x4_t vi7x6789 = vextq_f32(vi7x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4() local
706 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4()
D5x5p2-minmax-neon-4x4-acc2.c265 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
291 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
487 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
513 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
689 const float32x4_t vi7x6789 = vextq_f32(vi7x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2() local
714 vo3p0 = vmlaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neon_4x4_acc2()
D5x5p2-minmax-neonfma-4x4.c265 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
291 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
483 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
509 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
681 const float32x4_t vi7x6789 = vextq_f32(vi7x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4() local
706 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4()
D5x5p2-minmax-neonfma-4x4-acc2.c265 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
291 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
487 const float32x4_t vi7x6789 = vextq_f32(vi7x4567, vi7x89AB, 2); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
513 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
689 const float32x4_t vi7x6789 = vextq_f32(vi7x5678, vzero, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2() local
714 vo3p0 = vfmaq_lane_f32(vo3p0, vi7x6789, vwOP, 1); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__neonfma_4x4_acc2()
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4-acc2.c294 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
320 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
515 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
541 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
715 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2() local
740 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4_acc2()
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4.c294 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
320 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
511 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
537 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
707 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4() local
732 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4()
D5x5p2-minmax-wasmsimd-arm-loadsplat-4x4-acc2.c294 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
320 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
515 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
541 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
715 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2() local
740 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_loadsplat_4x4_acc2()
D5x5p2-minmax-wasmsimd-x86-loadsplat-4x4.c294 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
320 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
511 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
537 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
707 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4() local
732 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_loadsplat_4x4()
D5x5p2-minmax-sse-4x4.c298 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vi7x89AB, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
319 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
512 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vi7x89AB, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
533 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
706 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vzero, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4() local
727 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4()
D5x5p2-minmax-sse-4x4-acc2.c298 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vi7x89AB, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
319 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
516 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vi7x89AB, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
537 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
714 const __m128 vi7x6789 = _mm_shuffle_ps(vi7x5678, vzero, _MM_SHUFFLE(1, 0, 2, 1)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2() local
735 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi7x6789, vk44)); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__sse_4x4_acc2()
D5x5p2-minmax-wasmsimd-arm-splat-4x4-acc2.c268 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2() local
294 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
489 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2() local
515 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
689 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2() local
714 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
D5x5p2-minmax-wasmsimd-arm-splat-4x4.c268 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4() local
294 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
485 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4() local
511 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
681 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4() local
706 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
D5x5p2-minmax-wasmsimd-x86-splat-4x4.c268 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4() local
294 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
485 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4() local
511 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
681 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4() local
706 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
D5x5p2-minmax-wasmsimd-x86-splat-4x4-acc2.c268 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2() local
294 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
489 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x4567, vi7x89AB, 2, 3, 4, 5); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2() local
515 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
689 const v128_t vi7x6789 = wasm_v32x4_shuffle(vi7x5678, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2() local
714 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi7x6789, wasm_v32x4_shuffle(vwOP, vwOP, 1, 1, 1, 1))… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()