Home
last modified time | relevance | path

Searched refs:vi4x3 (Results 1 – 25 of 32) sorted by relevance

12

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-1x1-acc4.c102 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
151 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
157 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
211 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
217 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5p2-minmax-scalar-1x1-acc2.c102 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
151 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
157 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
209 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
215 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c102 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
151 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
157 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
210 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
216 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c102 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
151 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
157 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
208 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
214 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D5x5p2-minmax-scalar-2x1.c111 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
183 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
184 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
191 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
268 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
278 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
279 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
D5x5p2-minmax-scalar-3x1.c120 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
201 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
213 vo2p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
215 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
217 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
225 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
328 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
340 vo2p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
342 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
344 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5p2-minmax-scalar-1x1-acc5.c102 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
145 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
151 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
157 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
212 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
218 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D5x5p2-minmax-scalar-2x1-acc3.c111 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
183 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
184 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
191 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
272 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
282 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
283 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
D5x5p2-minmax-scalar-2x1-acc2.c111 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
173 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
183 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
184 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
191 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
270 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
280 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
281 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
D5x5p2-minmax-scalar-3x1-acc2.c120 float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
201 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
213 vo2p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
215 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
217 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
225 vi4x3 = vi4x4; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
331 vi4x2 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
343 vo2p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
345 vo1p0 += vi4x3 * vk33; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
347 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc4.c110 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
145 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
163 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
186 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4() local
210 vo0p3 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc4()
D5x5s2p2-minmax-scalar-1x1.c110 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
145 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
163 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
183 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1() local
207 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1()
D5x5s2p2-minmax-scalar-1x1-acc2.c110 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
145 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
163 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
184 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2() local
208 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc2()
D5x5s2p2-minmax-scalar-1x1-acc3.c110 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
145 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
163 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
185 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3() local
209 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc3()
D5x5s2p2-minmax-scalar-3x1.c143 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
214 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
248 vo2p0 += vi4x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
253 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
258 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
296 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
352 vo2p0 += vi4x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
357 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
362 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1-acc2.c143 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
214 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
248 vo2p1 += vi4x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
253 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
258 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
299 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
355 vo2p1 += vi4x3 * vk03; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
360 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
365 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-2x1.c127 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
208 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
211 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
240 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
282 vo1p0 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
285 vo0p0 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c127 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
208 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
211 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
242 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
284 vo1p1 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
287 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1-acc3.c127 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
208 vo1p2 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
211 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
244 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
286 vo1p2 += vi4x3 * vk23; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
289 vo0p1 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D5x5s2p2-minmax-scalar-1x1-acc5.c110 const float vi4x3 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
145 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
163 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
187 const float vi4x3 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5() local
211 vo0p4 += vi4x3 * vk43; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_1x1_acc5()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c236 const v128_t vi4x3 = wasm_v128_load(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
255 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk21c2, wasm_v32x4_shuffle(vi4x3, vi4x3, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
276 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk22c0, wasm_v32x4_shuffle(vi4x3, vi4x3, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
297 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk22c1, wasm_v32x4_shuffle(vi4x3, vi4x3, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
318 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk22c2, wasm_v32x4_shuffle(vi4x3, vi4x3, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
324 vi4x0 = vi4x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
508 v128_t vi4x3 = vzero; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
515 vi4x3 = wasm_v32x4_load_splat(i4 + 8); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
535 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk21c2, wasm_v32x4_shuffle(vi4x3, vi4x3, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
D3x3s2p1c3x4-sse-2x2.c235 const __m128 vi4x3 = _mm_loadu_ps(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
254 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk21c2, _mm_shuffle_ps(vi4x3, vi4x3, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
275 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk22c0, _mm_shuffle_ps(vi4x3, vi4x3, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
296 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk22c1, _mm_shuffle_ps(vi4x3, vi4x3, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
317 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk22c2, _mm_shuffle_ps(vi4x3, vi4x3, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
323 vi4x0 = vi4x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
508 __m128 vi4x3 = _mm_setzero_ps(); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
515 vi4x3 = _mm_load_ss(i4 + 8); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
535 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk21c2, _mm_shuffle_ps(vi4x3, vi4x3, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
D3x3s2p1c3x4-neonfma-2x2.c235 const float32x4_t vi4x3 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
254 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c2, vi4x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
275 vo1x1 = vfmaq_laneq_f32(vo1x1, vk22c0, vi4x3, 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
296 vo1x1 = vfmaq_laneq_f32(vo1x1, vk22c1, vi4x3, 2); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
317 vo1x1 = vfmaq_laneq_f32(vo1x1, vk22c2, vi4x3, 3); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
323 vi4x0 = vi4x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
508 float32x4_t vi4x3 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2() local
515 vi4x3 = vld1q_lane_f32(i4 + 8, vi4x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
535 vo1x1 = vfmaq_laneq_f32(vo1x1, vk21c2, vi4x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neonfma_2x2()
D3x3s2p1c3x4-neon-2x2.c235 const float32x4_t vi4x3 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
254 vo1x1 = vmlaq_lane_f32(vo1x1, vk21c2, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
275 vo1x1 = vmlaq_lane_f32(vo1x1, vk22c0, vget_low_f32(vi4x3), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
296 vo1x1 = vmlaq_lane_f32(vo1x1, vk22c1, vget_high_f32(vi4x3), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
317 vo1x1 = vmlaq_lane_f32(vo1x1, vk22c2, vget_high_f32(vi4x3), 1); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
323 vi4x0 = vi4x3; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
506 float32x4_t vi4x3 = vmovq_n_f32(0.0f); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2() local
513 vi4x3 = vld1q_lane_f32(i4 + 8, vi4x3, 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
533 vo1x1 = vmlaq_lane_f32(vo1x1, vk21c2, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__neon_2x2()
/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p1c3x4-neon-2x2.c237 const float32x4_t vi4x3 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2() local
259 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk21c2x0123, vget_low_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
283 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c0x0123, vget_low_f32(vi4x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
307 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c1x0123, vget_high_f32(vi4x3), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
331 vo1x1c0123 = vmlaq_lane_f32(vo1x1c0123, vk22c2x0123, vget_high_f32(vi4x3), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()
337 vi4x0 = vi4x3; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x4__neon_2x2()

12