Home
last modified time | relevance | path

Searched refs:vwGHIJ (Results 1 – 25 of 123) sorted by relevance

12345

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-wasmsimd-x86-splat-5x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4() local
147 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
148 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
149 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
150 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
151 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
187 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
188 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
189 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
190 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-5x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4() local
147 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
148 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
149 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
150 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
151 …vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi7x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
187 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
188 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
189 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
190 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_5x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-4x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2() local
134 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
135 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
136 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
137 …vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi6x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
168 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
169 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
170 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
171 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
210 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-4x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4() local
134 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
135 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
136 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
137 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
168 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
169 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
170 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
171 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
210 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_4x4()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-4x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4() local
134 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
135 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
136 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
137 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
168 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
169 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
170 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
171 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
210 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-4x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2() local
134 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
135 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
136 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
137 …vo3p1 = wasm_f32x4_add(vo3p1, wasm_f32x4_mul(vi6x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
168 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
169 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
170 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
171 …vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi6x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
210 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_4x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-3x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2() local
121 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
122 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
123 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
149 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
150 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
151 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
184 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
185 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
186 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-3x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4() local
121 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
122 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
123 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
149 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
150 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
151 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
184 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
185 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
186 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-3x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2() local
121 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
122 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
123 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
149 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
150 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
151 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
184 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
185 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
186 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi5x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_3x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-3x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4() local
121 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
122 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
123 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
149 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
150 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
151 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
184 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
185 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
186 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi5x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-2x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4() local
108 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
109 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
130 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
131 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
158 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
159 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
180 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
181 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
251 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-2x4-acc3.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3() local
108 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
109 …vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
130 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
131 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
158 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
159 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
180 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
181 …vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi4x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
255 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc3()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-2x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2() local
108 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
109 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
130 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
131 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
158 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
159 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
180 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
181 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
253 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-2x4-acc3.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3() local
108 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
109 …vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
130 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
131 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
158 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
159 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
180 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
181 …vo1p2 = wasm_f32x4_add(vo1p2, wasm_f32x4_mul(vi4x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
255 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc3()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-2x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4() local
108 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
109 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
130 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
131 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
158 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
159 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
180 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
181 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
251 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_2x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-2x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2() local
108 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
109 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
130 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
131 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
158 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
159 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi4x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
180 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
181 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi4x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
253 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_2x4_acc2()
[all …]
D5x5s2p2-minmax-wasmsimd-x86-splat-3x4-acc2.c44 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2() local
181 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
182 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
183 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
201 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
202 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
203 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
240 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
241 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
242 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4_acc2()
[all …]
D5x5s2p2-minmax-wasmsimd-arm-splat-3x4-acc2.c44 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2() local
181 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
182 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
183 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
201 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
202 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
203 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
240 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
241 …vo1p1 = wasm_f32x4_add(vo1p1, wasm_f32x4_mul(vi5x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
242 …vo2p1 = wasm_f32x4_add(vo2p1, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4_acc2()
[all …]
D5x5s2p2-minmax-wasmsimd-x86-splat-3x4.c44 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4() local
181 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
182 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
183 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
201 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
202 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
203 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
240 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
241 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
242 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_x86_splat_3x4()
[all …]
D5x5s2p2-minmax-wasmsimd-arm-splat-3x4.c44 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4() local
181 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
182 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
183 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x8ACE, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
201 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
202 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
203 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x9BDF, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
240 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
241 …vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi5x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
242 …vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi7x68AC, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__wasmsimd_arm_splat_3x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-1x4-acc4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4() local
95 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
111 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
132 …vo0p3 = wasm_f32x4_add(vo0p3, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
148 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
204 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
220 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
241 …vo0p3 = wasm_f32x4_add(vo0p3, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
257 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
310 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc4()
[all …]
D5x5p2-minmax-wasmsimd-x86-splat-1x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4() local
95 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
111 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
132 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
148 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
201 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
217 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
238 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
254 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
304 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_x86_splat_1x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-1x4-acc2.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2() local
95 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
111 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
132 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
148 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
202 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
218 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
239 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
255 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
306 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc2()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-1x4.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4() local
95 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
111 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
132 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
148 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
201 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
217 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
238 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
254 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
304 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4()
[all …]
D5x5p2-minmax-wasmsimd-arm-splat-1x4-acc3.c42 const v128_t vwGHIJ = wasm_v128_load(weights + 16); in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3() local
95 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
111 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
132 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
148 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
203 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
219 …vo0p1 = wasm_f32x4_add(vo0p1, wasm_f32x4_mul(vi3x3456, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 1, 1, 1,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
240 …vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi3x2345, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 0, 0, 0,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
256 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x5678, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 3, 3, 3,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
308 …vo0p2 = wasm_f32x4_add(vo0p2, wasm_f32x4_mul(vi3x4567, wasm_v32x4_shuffle(vwGHIJ, vwGHIJ, 2, 2, 2,… in xnn_f32_dwconv2d_chw_ukernel_5x5p2__wasmsimd_arm_splat_1x4_acc3()
[all …]

12345