/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 267 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 279 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 299 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3() 331 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 266 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 278 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 298 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2() 330 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() local 138 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 265 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 277 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 297 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4() 329 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() local 138 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 265 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 277 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 297 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4() 329 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 268 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 280 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 300 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4() 332 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc4.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 248 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 260 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 280 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4() 312 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc5.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 249 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 261 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 281 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5() 313 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc5()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc4.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 268 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 280 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 300 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4() 332 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc4()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc2.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 246 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 258 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 278 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2() 310 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc5.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 249 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 261 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 281 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5() 313 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc5()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc2.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 246 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 258 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 278 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2() 310 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-1x4-acc3.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 247 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 259 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 279 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3() 311 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-1x4.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() local 118 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 245 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 257 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 277 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4() 309 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4()
|
D | 5x5s2p2-minmax-wasmsimd-arm-loadsplat-1x4-acc5.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 269 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 281 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 301 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5() 333 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_loadsplat_1x4_acc5()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc4.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 248 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 260 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 280 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4() 312 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc4()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 266 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 278 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 298 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2() 330 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc2()
|
D | 5x5s2p2-minmax-wasmsimd-x86-splat-1x4-acc3.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() local 118 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 247 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 259 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 279 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3() 311 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_1x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 267 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 279 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 299 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3() 331 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc3()
|
D | 5x5s2p2-minmax-wasmsimd-arm-splat-1x4.c | 103 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() local 118 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 138 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 139 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 207 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 208 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 245 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 257 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi0x8ACE, wasm_v32x4_shuffle(vw0123, vw0123, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 277 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4() 309 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_1x4()
|
D | 5x5s2p2-minmax-wasmsimd-x86-loadsplat-1x4-acc5.c | 123 v128_t vi0x8ACE = wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() local 138 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 158 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 159 vi0x0246 = vi0x8ACE; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 227 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vi0xGIKM, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 228 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 269 vi0x8ACE = wasm_v128_and(vmask_even, vi0x8ACE); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 281 v128_t vo0p1 = wasm_f32x4_mul(vi0x8ACE, vk02); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 301 const v128_t vi0x68AC = wasm_v32x4_shuffle(vi0x0246, vi0x8ACE, 3, 4, 5, 6); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5() 333 const v128_t vi0xACEG = wasm_v32x4_shuffle(vi0x8ACE, vzero, 1, 2, 3, 4); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_loadsplat_1x4_acc5()
|
D | 5x5s2p2-minmax-sse-1x4-acc2.c | 119 __m128 vi0x8ACE = _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() local 132 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 138 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 222 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vi0xGIKM); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 223 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 258 vi0x8ACE = _mm_and_ps(vi0x8ACE, vmask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 269 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 275 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2() 318 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc2()
|
D | 5x5s2p2-minmax-sse-1x4-acc5.c | 119 __m128 vi0x8ACE = _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() local 132 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 138 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 222 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vi0xGIKM); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 223 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 261 vi0x8ACE = _mm_and_ps(vi0x8ACE, vmask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 272 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 278 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5() 321 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc5()
|
D | 5x5s2p2-minmax-sse-1x4.c | 119 __m128 vi0x8ACE = _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() local 132 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 138 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 222 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vi0xGIKM); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 223 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 257 vi0x8ACE = _mm_and_ps(vi0x8ACE, vmask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 268 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 274 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4() 317 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4()
|
D | 5x5s2p2-minmax-sse-1x4-acc4.c | 119 __m128 vi0x8ACE = _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() local 132 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 138 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 222 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vi0xGIKM); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 223 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 260 vi0x8ACE = _mm_and_ps(vi0x8ACE, vmask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 271 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 277 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4() 320 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc4()
|
D | 5x5s2p2-minmax-sse-1x4-acc3.c | 119 __m128 vi0x8ACE = _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0, 2, 0)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() local 132 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 138 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 222 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vi0xGIKM); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 223 vi0x8ACE = vi0xGIKM; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 259 vi0x8ACE = _mm_and_ps(vi0x8ACE, vmask_even); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 270 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x8ACE, vk02)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 276 const __m128 vi0xE8AC = _mm_shuffle_ps(vi0x8ACE, vi0x8ACE, _MM_SHUFFLE(2, 1, 0, 3)); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3() 319 const __m128 vi0xGACE = _mm_move_ss(vi0x8ACE, vzero); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__sse_1x4_acc3()
|