/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3s2p1-minmax-neonfma-1x4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() 129 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc2.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() 130 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() 132 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
|
D | 3x3s2p1-minmax-neon-1x4-acc2.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() 130 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
|
D | 3x3s2p1-minmax-neon-1x4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() 129 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
|
D | 3x3s2p1-minmax-neon-1x4-acc3.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() 131 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfma-1x4-acc3.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() 131 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
|
D | 3x3s2p1-minmax-neon-1x4-acc4.c | 63 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local 83 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 84 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() 132 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4() 157 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2() 158 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 147 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4() 147 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc3.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3() 149 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc2.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2() 148 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc2.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 148 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-arm-splat-1x4-acc4.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4() 150 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc4.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc4() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc4() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc4() 160 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 159 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 158 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc4.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 150 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc3.c | 66 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() local 99 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 100 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 149 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 157 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 160 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c | 76 v128_t vi2x1357 = vzero; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc3() local 109 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc3() 110 vi2x1357 = vi2x9BDF; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc3() 159 const v128_t vi2x7BDF = wasm_v32x4_shuffle(vi2x1357, vi2x9BDF, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_1x4_acc3()
|
D | 3x3s2p1-minmax-neonfma-2x4-acc2.c | 74 float32x4_t vi2x1357 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local 102 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x8ACE9BDF.val[1], 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 103 vi2x1357 = vi2x8ACE9BDF.val[1]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() 173 const float32x4_t vi2x7BDF = vextq_f32(vi2x1357, vi2x9BDF, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
|