Home
last modified time | relevance | path

Searched refs:vk20 (Results 1 – 25 of 198) sorted by relevance

12345678

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-scalar-6x1.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
128 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
129 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
130 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
131 vo3p0 += vi5x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
132 vo4p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
133 vo5p0 += vi7x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
228 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
229 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
230 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
[all …]
D3x3p1-minmax-scalar-5x1.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
117 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
118 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
119 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
120 vo3p0 += vi5x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
121 vo4p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
203 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
204 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
205 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
206 vo3p0 += vi5x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
[all …]
D3x3p1-minmax-scalar-4x1.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
106 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
107 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
108 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
109 vo3p0 += vi5x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
178 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
179 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
180 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
181 vo3p0 += vi5x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3s2p1-minmax-scalar-4x1.c42 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
124 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
125 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
126 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
127 vo3p0 += vi8x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
221 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
222 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
223 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
224 vo3p0 += vi8x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
D3x3p1-minmax-scalar-3x1.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
95 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
96 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
97 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
153 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
154 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
155 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3s2p1-minmax-scalar-3x1.c42 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
108 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
109 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
110 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
185 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
186 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
187 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
D3x3p1-minmax-scalar-2x1.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local
84 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
85 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
128 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
129 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
D3x3p1-minmax-scalar-2x1-acc2.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local
84 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
85 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
130 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
131 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
D3x3s2p1-minmax-scalar-2x1.c42 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local
92 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
93 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
149 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
150 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
D3x3s2p1-minmax-scalar-2x1-acc2.c42 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local
92 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
93 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
151 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
152 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
D5x5p2-minmax-scalar-3x1.c45 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
139 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
140 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
141 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
266 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
267 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
268 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
371 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
372 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
373 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5p2-minmax-scalar-3x1-acc2.c45 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
139 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
140 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
141 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
269 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
270 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
271 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
377 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
378 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
379 vo2p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-3x1.c46 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
174 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
175 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
176 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
308 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
309 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
310 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
385 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
386 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
387 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c46 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
174 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
175 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
176 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
311 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
312 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
313 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
391 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
392 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
393 vo2p0 += vi6x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5p2-minmax-scalar-2x1.c45 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
126 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
127 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
221 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
222 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
300 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
301 vo1p0 += vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c49 const v128_t vk20 = wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
167 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
168 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
169 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
170 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
171 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
282 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
283 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
284 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
285 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
[all …]
D3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c49 const v128_t vk20 = wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
184 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
185 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
186 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
187 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
188 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
189 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
317 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
318 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
319 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
[all …]
D3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c49 const v128_t vk20 = wasm_v32x4_shuffle(vw4567, vw4567, 3, 3, 3, 3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
184 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
185 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
186 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
187 vo3p0 = wasm_f32x4_add(vo3p0, wasm_f32x4_mul(vi5x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
188 vo4p0 = wasm_f32x4_add(vo4p0, wasm_f32x4_mul(vi6x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
189 vo5p0 = wasm_f32x4_add(vo5p0, wasm_f32x4_mul(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
317 vo0p0 = wasm_f32x4_add(vo0p0, wasm_f32x4_mul(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
318 vo1p0 = wasm_f32x4_add(vo1p0, wasm_f32x4_mul(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
319 vo2p0 = wasm_f32x4_add(vo2p0, wasm_f32x4_mul(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
[all …]
D3x3p1-minmax-ssse3-6x4.c44 const __m128 vk20 = _mm_load1_ps(weights + 7); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
177 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
178 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
179 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
180 vo3p0 = _mm_add_ps(vo3p0, _mm_mul_ps(vi5x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
181 vo4p0 = _mm_add_ps(vo4p0, _mm_mul_ps(vi6x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
182 vo5p0 = _mm_add_ps(vo5p0, _mm_mul_ps(vi7x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
311 vo0p0 = _mm_add_ps(vo0p0, _mm_mul_ps(vi2x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
312 vo1p0 = _mm_add_ps(vo1p0, _mm_mul_ps(vi3x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
313 vo2p0 = _mm_add_ps(vo2p0, _mm_mul_ps(vi4x3456, vk20)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
[all …]
D3x3p1-minmax-scalar-1x1-acc3.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
73 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
105 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
D3x3p1-minmax-scalar-1x1.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1() local
73 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
103 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1()
D3x3p1-minmax-scalar-1x1-acc2.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2() local
73 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
104 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc2()
D3x3p1-minmax-scalar-1x1-acc4.c41 const float vk20 = weights[7]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4() local
73 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4()
106 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-2x1.c46 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
150 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
151 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
248 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
249 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
302 vo0p0 += vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
303 vo1p0 += vi4x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5p2-minmax-scalar-2x1-acc3.c45 const float vk20 = weights[11]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
126 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
127 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
225 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
226 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
308 float vo0p2 = vi2x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
309 float vo1p2 = vi3x0 * vk20; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()

12345678