Home
last modified time | relevance | path

Searched refs:vbias (Results 1 – 25 of 252) sorted by relevance

1234567891011

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-scalar-6x1.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
116 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
117 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
118 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
119 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
120 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
121 float vo5p0 = vbias + vi5x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
216 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
217 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
218 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
[all …]
D3x3p1-minmax-scalar-5x1.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
107 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
108 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
109 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
110 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
111 float vo4p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
193 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
194 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
195 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
196 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
[all …]
D3x3p1-minmax-scalar-4x1.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
98 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
99 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
100 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
101 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
170 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
171 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
172 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
173 float vo3p0 = vbias + vi3x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3s2p1-minmax-scalar-4x1.c35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
116 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
117 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
118 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
119 float vo3p0 = vbias + vi6x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
213 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
214 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
215 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
216 float vo3p0 = vbias + vi6x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
D3x3p1-minmax-scalar-3x1.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1() local
89 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
90 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
91 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
147 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
148 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
149 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_3x1()
D3x3s2p1-minmax-scalar-3x1.c35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1() local
102 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
103 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
104 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
179 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
180 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
181 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_3x1()
D3x3p1-minmax-scalar-2x1-acc2.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2() local
80 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
126 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
127 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1_acc2()
D3x3p1-minmax-scalar-2x1.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1() local
80 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
81 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
124 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
125 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_2x1()
D3x3s2p1-minmax-scalar-2x1-acc2.c35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2() local
88 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
147 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
148 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1_acc2()
D3x3s2p1-minmax-scalar-2x1.c35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1() local
88 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
89 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
145 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
146 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_2x1()
D5x5p2-minmax-scalar-3x1.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1() local
133 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
134 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
260 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
261 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
262 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
365 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
366 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
367 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1()
D5x5s2p2-minmax-scalar-3x1.c35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1() local
168 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
169 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
302 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
303 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
304 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
379 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
380 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
381 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1()
D5x5p2-minmax-scalar-3x1-acc2.c34 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2() local
133 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
134 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
135 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
263 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
264 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
265 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
371 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
372 float vo1p0 = vbias + vi1x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
373 float vo2p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5p2__scalar_3x1_acc2()
D5x5s2p2-minmax-scalar-3x1-acc2.c35 const float vbias = weights[0]; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2() local
168 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
169 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
170 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
305 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
306 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
307 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
385 float vo0p0 = vbias + vi0x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
386 float vo1p0 = vbias + vi2x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
387 float vo2p0 = vbias + vi4x0 * vk00; in xnn_f32_dwconv2d_chw_ukernel_5x5s2p2__scalar_3x1_acc2()
D3x3p1-minmax-ssse3-6x4.c37 const __m128 vbias = _mm_load1_ps(weights); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
137 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
138 __m128 vo1p0 = _mm_add_ps(vbias, _mm_mul_ps(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
139 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
140 __m128 vo3p0 = _mm_add_ps(vbias, _mm_mul_ps(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
141 __m128 vo4p0 = _mm_add_ps(vbias, _mm_mul_ps(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
142 __m128 vo5p0 = _mm_add_ps(vbias, _mm_mul_ps(vi5x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
271 __m128 vo0p0 = _mm_add_ps(vbias, _mm_mul_ps(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
272 __m128 vo1p0 = _mm_add_ps(vbias, _mm_mul_ps(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
273 __m128 vo2p0 = _mm_add_ps(vbias, _mm_mul_ps(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
[all …]
D3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c42 const v128_t vbias = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
144 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
145 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
146 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
147 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
148 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
149 v128_t vo5p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi5x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
277 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
278 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
279 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
[all …]
D3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c42 const v128_t vbias = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
144 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
145 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
146 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
147 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
148 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
149 v128_t vo5p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi5x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
277 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
278 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
279 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
[all …]
D3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c42 const v128_t vbias = wasm_v32x4_shuffle(vw0123, vw0123, 0, 0, 0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
133 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
134 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
135 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
136 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
137 v128_t vo4p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi4x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
248 v128_t vo0p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi0x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
249 v128_t vo1p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi1x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
250 v128_t vo2p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi2x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
251 v128_t vo3p0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vi3x4567, vk01)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
[all …]
/external/XNNPACK/src/f32-vmulcaddc/gen/
Dc8-minmax-wasmsimd-arm-2x.c95 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local
97 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x()
98 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x()
121 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x() local
123 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x()
124 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_arm_2x()
Dc8-minmax-wasmsimd-x86-2x.c95 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local
97 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x()
98 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x()
121 const v128_t vbias = wasm_v128_load(w + 8); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x() local
123 vacc0 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc0)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x()
124 vacc1 = wasm_f32x4_add(vbias, wasm_f32x4_mul(vscale, vacc1)); in xnn_f32_vmulcaddc_minmax_ukernel_c8__wasmsimd_x86_2x()
Dc1-minmax-scalar-2x.c53 const float vbias = w[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x() local
55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x()
56 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__scalar_2x()
Dc1-minmax-wasm-2x.c53 const float vbias = w[1]; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x() local
55 vacc0 = vacc0 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x()
56 vacc1 = vacc1 * vscale + vbias; in xnn_f32_vmulcaddc_minmax_ukernel_c1__wasm_2x()
/external/XNNPACK/src/f32-vmulcaddc/
Dscalar.c.in66 const float vbias${ABC[C]} = w[${C + CHANNEL_TILE}];
70 vacc${M}x${ABC[C]} = vacc${M}x${ABC[C]} * vscale${ABC[C]} + vbias${ABC[C]};
94 const float vbias = w[${CHANNEL_TILE - 1}];
97 vacc${M} = vacc${M} * vscale + vbias;
118 const float vbias = w[1];
121 vacc${M} = vacc${M} * vscale + vbias;
/external/XNNPACK/src/qs8-gavgpool/gen/
D7x-minmax-wasmsimd-c24-acc2.c56 const v128_t vbias = wasm_v128_load(params->wasmsimd.bias); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2() local
113 const v128_t vacc0123 = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
114 const v128_t vacc4567 = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
115 const v128_t vacc89AB = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
116 const v128_t vaccCDEF = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
117 const v128_t vaccGHIJ = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
118 const v128_t vaccKLMN = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
230 const v128_t vacc0123 = wasm_i32x4_add(vbias, wasm_i32x4_widen_low_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
231 const v128_t vacc4567 = wasm_i32x4_add(vbias, wasm_i32x4_widen_high_i16x8(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__wasmsimd_c24_acc2()
D7x-minmax-sse41-c24-acc2.c56 const __m128i vbias = _mm_load_si128((const __m128i*) params->sse2.bias); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2() local
113 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
114 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
115 const __m128i vacc89AB = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x89ABCDEF)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
116 …const __m128i vaccCDEF = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x89ABCDEF, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
117 const __m128i vaccGHIJ = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0xGHIJKLMN)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
118 …const __m128i vaccKLMN = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0xGHIJKLMN, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
222 const __m128i vacc0123 = _mm_add_epi32(vbias, _mm_cvtepi16_epi32(vacc0x01234567)); in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()
223 …const __m128i vacc4567 = _mm_add_epi32(vbias, _mm_unpackhi_epi16(vacc0x01234567, _mm_cmpgt_epi16(_… in xnn_qs8_gavgpool_minmax_ukernel_7x__sse41_c24_acc2()

1234567891011