Home
last modified time | relevance | path

Searched refs:vi4x1 (Results 1 – 25 of 64) sorted by relevance

123

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D5x5p2-minmax-scalar-2x1.c95 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1() local
137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
147 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
148 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
232 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
242 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
243 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
250 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
314 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1()
[all …]
D5x5p2-minmax-scalar-3x1.c102 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
153 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
165 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
167 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
169 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
280 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
292 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
294 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
296 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
[all …]
D5x5p2-minmax-scalar-1x1-acc4.c88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4() local
121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
127 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
187 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
193 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
199 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
243 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc4()
D5x5p2-minmax-scalar-1x1-acc2.c88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2() local
121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
127 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
185 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
191 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
197 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
239 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc2()
D5x5p2-minmax-scalar-1x1-acc3.c88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3() local
121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
127 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
186 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
192 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
198 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
241 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc3()
D5x5p2-minmax-scalar-1x1.c88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1() local
121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
127 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
184 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
190 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
196 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
237 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1()
D3x3p1-minmax-scalar-5x1.c93 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
127 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
135 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
139 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
143 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
151 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
213 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
217 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
221 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
D5x5p2-minmax-scalar-3x1-acc2.c102 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
153 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
165 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
167 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
169 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
177 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
283 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
295 vo2p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
297 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
299 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
[all …]
D5x5p2-minmax-scalar-2x1-acc3.c95 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3() local
137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
147 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
148 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
236 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
246 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
247 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
254 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
322 vo1p2 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc3()
[all …]
D5x5p2-minmax-scalar-2x1-acc2.c95 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2() local
137 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
147 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
148 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
155 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
234 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
244 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
245 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
252 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
318 vo1p0 += vi4x1 * vk31; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_2x1_acc2()
[all …]
D5x5p2-minmax-scalar-1x1-acc5.c88 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5() local
121 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
127 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
133 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
188 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
194 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
200 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
245 vo0p4 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_1x1_acc5()
D3x3p1-minmax-scalar-6x1.c100 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
139 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
148 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
153 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
158 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
167 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
239 vo4p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
244 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
249 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
D3x3p1-minmax-scalar-4x1.c86 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
115 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
125 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
128 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
135 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
190 vo3p0 += vi4x1 * vk11; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
193 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D5x5s2p2-minmax-scalar-3x1.c121 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
196 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
201 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
206 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
214 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
320 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
325 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
330 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
397 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
402 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
[all …]
D5x5s2p2-minmax-scalar-3x1-acc2.c121 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
196 vo2p1 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
201 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
206 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
214 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
323 vo2p1 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
328 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
333 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
403 vo2p1 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
408 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
[all …]
D3x3s2p1-minmax-scalar-3x1.c98 const float vi4x1 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
129 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
134 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
175 const float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
191 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
196 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
D5x5s2p2-minmax-scalar-2x1.c109 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1() local
170 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
173 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
260 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
263 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
314 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
317 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1()
D5x5s2p2-minmax-scalar-2x1-acc2.c109 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2() local
170 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
173 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
262 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
265 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
318 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
321 vo0p1 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc2()
D5x5s2p2-minmax-scalar-2x1-acc3.c109 float vi4x1 = 0.0f; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3() local
170 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
173 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
180 vi4x1 = vi4x3; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
264 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
267 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
322 vo1p1 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
325 vo0p0 += vi4x1 * vk41; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_2x1_acc3()
D3x3p1-minmax-scalar-3x1.c79 float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
103 vi4x0 = vi4x1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
113 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
119 vi4x1 = vi4x2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
165 vo2p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3s2p1-minmax-scalar-4x1.c110 const float vi4x1 = i4[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
150 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
157 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
207 const float vi4x1 = *i4++; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
228 vo2p0 += vi4x1 * vk01; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
235 vo1p0 += vi4x1 * vk21; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
/external/XNNPACK/src/f32-conv-hwc2chw/
D3x3s2p1c3x4-wasmsimd-2x2.c117 const v128_t vi4x1 = wasm_v128_load(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
136 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
204 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
225 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi4x1, vi4x1, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
253 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c2, wasm_v32x4_shuffle(vi4x1, vi4x1, 2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
274 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk22c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
365 v128_t vi4x1 = wasm_v128_load(i4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2() local
389 …vo1x1 = wasm_f32x4_add(vo1x1, wasm_f32x4_mul(vk20c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
469 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c0, wasm_v32x4_shuffle(vi4x1, vi4x1, 0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
496 …vo1x0 = wasm_f32x4_add(vo1x0, wasm_f32x4_mul(vk21c1, wasm_v32x4_shuffle(vi4x1, vi4x1, 1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__wasmsimd_2x2()
[all …]
D3x3s2p1c3x4-sse-2x2.c116 const __m128 vi4x1 = _mm_loadu_ps(i4); i4 += 4; in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
135 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
203 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
224 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
252 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c2, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(2, 2, 2, 2))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
273 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk22c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
365 __m128 vi4x1 = _mm_loadu_ps(i4); in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2() local
389 …vo1x1 = _mm_add_ps(vo1x1, _mm_mul_ps(vk20c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(3, 3, 3, 3))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
469 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c0, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(0, 0, 0, 0))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
496 …vo1x0 = _mm_add_ps(vo1x0, _mm_mul_ps(vk21c1, _mm_shuffle_ps(vi4x1, vi4x1, _MM_SHUFFLE(1, 1, 1, 1))… in xnn_f32_conv_hwc2chw_ukernel_3x3s2p1c3x4__sse_2x2()
[all …]
/external/XNNPACK/src/f32-conv-hwc/gen/
D3x3s2p1c3x8-neon-2x1.c173 const float32x4_t vi4x1 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1() local
195 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c0x0123, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
197 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c0x4567, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
219 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c1x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
221 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c1x4567, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
243 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
245 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk21c2x4567, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
267 vo1c0123 = vmlaq_lane_f32(vo1c0123, vk22c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
269 vo1c4567 = vmlaq_lane_f32(vo1c4567, vk22c0x4567, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
330 vi4x0 = vcombine_f32(vget_high_f32(vi4x1), vi4x2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neon_2x1()
[all …]
D3x3s2p1c3x8-neonfma-2x1.c175 const float32x4_t vi4x1 = vld1q_f32(i4); i4 += 4; in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1() local
197 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c0x0123, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
199 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c0x4567, vget_low_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
221 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c1x0123, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
223 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c1x4567, vget_low_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
245 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk21c2x0123, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
247 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk21c2x4567, vget_high_f32(vi4x1), 0); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
269 vo1c0123 = vfmaq_lane_f32(vo1c0123, vk22c0x0123, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
271 vo1c4567 = vfmaq_lane_f32(vo1c4567, vk22c0x4567, vget_high_f32(vi4x1), 1); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
332 vi4x0 = vcombine_f32(vget_high_f32(vi4x1), vi4x2); in xnn_f32_conv_hwc_ukernel_3x3s2p1c3x8__neonfma_2x1()
[all …]

123