/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5s2p2-minmax-neonfma-3x4-acc2.c | 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 202 vo2p1 = vfmaq_lane_f32(vo2p1, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 387 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 404 vo2p1 = vfmaq_lane_f32(vo2p1, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
|
D | 5x5s2p2-minmax-neon-3x4.c | 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 202 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 384 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 401 vo2p0 = vmlaq_lane_f32(vo2p0, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
|
D | 5x5s2p2-minmax-neonfma-3x4.c | 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 202 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 384 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 401 vo2p0 = vfmaq_lane_f32(vo2p0, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
|
D | 5x5s2p2-minmax-neon-3x4-acc2.c | 183 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 202 vo2p1 = vmlaq_lane_f32(vo2p1, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 387 const float32x4_t vi7x68AC = vextq_f32(vi7x0246, vi7x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 404 vo2p1 = vmlaq_lane_f32(vo2p1, vi7x68AC, vget_low_f32(vwGHIJ), 0); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c | 243 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local 262 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() 492 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local 509 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4.c | 243 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local 262 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() 489 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local 506 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c | 243 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local 262 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() 492 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local 509 vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4.c | 243 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local 262 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() 489 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4() local 506 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4()
|
D | 5x5s2p2-minmax-sse-3x4.c | 237 const __m128 vi7x68AC = _mm_move_ss(vi7xE8AC, vi7x6024); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() local 263 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() 492 const __m128 vi7x68AC = _mm_move_ss(vi7xE8AC, vi7x6024); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4() local 516 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4()
|
D | 5x5s2p2-minmax-sse-3x4-acc2.c | 237 const __m128 vi7x68AC = _mm_move_ss(vi7xE8AC, vi7x6024); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() local 263 vo2p1 = _mm_add_ps(vo2p1, _mm_mul_ps(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() 495 const __m128 vi7x68AC = _mm_move_ss(vi7xE8AC, vi7x6024); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2() local 519 vo2p1 = _mm_add_ps(vo2p1, _mm_mul_ps(vi7x68AC, vk30)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-3x4-acc2.c | 223 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local 242 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() 472 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local 489 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-3x4-acc2.c | 223 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local 242 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() 472 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local 489 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-3x4.c | 223 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local 242 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() 469 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local 486 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-3x4.c | 223 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local 242 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() 469 const v128_t vi7x68AC = wasm_v32x4_shuffle(vi7x0246, vi7x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local 486 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
|