/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5s2p2-minmax-neon-2x4-acc2.c | 89 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() local 154 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 155 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2() 311 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc2()
|
D | 5x5s2p2-minmax-neonfma-2x4-acc2.c | 89 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() local 154 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 155 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2() 311 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc2()
|
D | 5x5s2p2-minmax-neon-2x4.c | 89 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() local 154 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 155 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4() 309 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4()
|
D | 5x5s2p2-minmax-neon-2x4-acc3.c | 89 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() local 154 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 155 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3() 313 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_2x4_acc3()
|
D | 5x5s2p2-minmax-neonfma-2x4-acc3.c | 89 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() local 154 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 155 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3() 313 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4_acc3()
|
D | 5x5s2p2-minmax-neonfma-2x4.c | 89 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() local 154 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 155 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4() 309 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_2x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4.c | 112 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4() local 206 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4() 207 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4() 396 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc2.c | 112 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2() local 206 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2() 207 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2() 398 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc2.c | 112 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2() local 206 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2() 207 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2() 398 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4.c | 112 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4() local 206 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4() 207 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4() 396 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-2x4-acc3.c | 112 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() local 206 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() 207 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3() 400 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_2x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-2x4-acc3.c | 112 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() local 206 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() 207 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3() 400 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_2x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-2x4-acc2.c | 92 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc2() local 186 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc2() 187 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc2() 378 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-2x4.c | 92 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4() local 186 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4() 187 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4() 376 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-2x4.c | 92 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4() local 186 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4() 187 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4() 376 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-2x4-acc2.c | 92 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc2() local 186 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc2() 187 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc2() 378 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-2x4-acc3.c | 92 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc3() local 186 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc3() 187 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc3() 380 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_2x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-2x4-acc3.c | 92 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc3() local 186 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc3() 187 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc3() 380 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_2x4_acc3()
|
D | 5x5s2p2-minmax-neonfma-3x4-acc2.c | 99 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() local 181 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 182 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2() 386 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4_acc2()
|
D | 5x5s2p2-minmax-neon-3x4.c | 99 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() local 181 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 182 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4() 383 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4()
|
D | 5x5s2p2-minmax-neonfma-3x4.c | 99 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() local 181 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 182 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4() 383 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neonfma_3x4()
|
D | 5x5s2p2-minmax-neon-3x4-acc2.c | 99 float32x4_t vi6x0246 = vmovq_n_f32(0.0f); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() local 181 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE9BDF.val[0], 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 182 vi6x0246 = vi6x8ACE9BDF.val[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2() 386 const float32x4_t vi6x68AC = vextq_f32(vi6x0246, vi6x8ACE, 3); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__neon_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4-acc2.c | 122 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() local 241 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() 242 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2() 491 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-3x4.c | 122 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() local 241 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() 242 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4() 488 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_3x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-3x4-acc2.c | 122 v128_t vi6x0246 = vzero; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() local 241 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() 242 vi6x0246 = vi6x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2() 491 const v128_t vi6x68AC = wasm_v32x4_shuffle(vi6x0246, vi6x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_3x4_acc2()
|