Home
last modified time | relevance | path

Searched refs:vo3 (Results 1 – 25 of 60) sorted by relevance

123

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-wasmsimd-x86-loadsplat-4x4.c193 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
197 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
199 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
276 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4() local
280 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
283 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
293 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
305 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
308 *o3 = wasm_f32x4_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_4x4()
D3x3p1-minmax-scalar-4x1.c155 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
160 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
162 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
200 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1() local
205 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
207 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_4x1()
D3x3p1-minmax-wasmsimd-x86-splat-4x4.c181 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4() local
185 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
187 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
271 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4() local
275 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
278 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
284 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
292 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
295 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_4x4()
D3x3p1-minmax-scalar-5x1.c175 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
181 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
185 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
229 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1() local
235 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
239 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_5x1()
D3x3s2p1-minmax-scalar-4x1.c188 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
193 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
195 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
243 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1() local
248 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
250 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__scalar_4x1()
D3x3p1-minmax-ssse3-4x4.c186 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
191 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
193 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
271 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4() local
276 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
279 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
289 _mm_storel_pi((__m64*) o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
301 vo3 = _mm_movehl_ps(vo3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
304 _mm_store_ss(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_4x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-4x4.c193 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4() local
197 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
199 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
276 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4() local
280 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
283 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
293 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
305 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
308 *o3 = wasm_f32x4_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_4x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-5x4.c217 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local
222 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
227 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
316 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4() local
321 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
327 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
339 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
351 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
357 *o3 = wasm_f32x4_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_5x4()
D3x3p1-minmax-sse-4x4.c236 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local
241 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
243 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
359 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4() local
364 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
367 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
377 _mm_storel_pi((__m64*) o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
389 vo3 = _mm_movehl_ps(vo3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
392 _mm_store_ss(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_4x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-5x4.c217 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
222 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
227 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
316 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4() local
321 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
327 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
339 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
351 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
357 *o3 = wasm_f32x4_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_5x4()
D3x3p1-minmax-wasmsimd-arm-splat-4x4.c181 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4() local
185 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
187 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
271 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4() local
275 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
278 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
284 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
292 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
295 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_4x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-6x4.c241 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
247 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
255 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
356 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4() local
362 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
371 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
385 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
397 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
406 *o3 = wasm_f32x4_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_6x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c237 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
241 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
243 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
353 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
357 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
361 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
367 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
375 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
378 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c227 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
231 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
233 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
343 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
347 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
351 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
357 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
365 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
368 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
D3x3p1-minmax-wasmsimd-x86-splat-5x4.c204 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4() local
209 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
213 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
310 v128_t vo3 = wasm_v128_bitselect(vmin, vo3p0, wasm_f32x4_lt(vo3p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4() local
315 vo3 = wasm_v128_bitselect(vo3, vmax, wasm_f32x4_le(vo3, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
320 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
327 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
335 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
340 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_5x4()
D3x3p1-minmax-scalar-6x1.c195 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
202 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
208 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
258 float vo3 = math_max_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1() local
265 vo3 = math_min_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
271 *o3++ = vo3; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_6x1()
D3x3p1-minmax-ssse3-5x4.c210 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local
216 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
221 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
311 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4() local
317 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
323 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
335 _mm_storel_pi((__m64*) o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
347 vo3 = _mm_movehl_ps(vo3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
353 _mm_store_ss(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_5x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c237 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
241 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
243 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
353 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
357 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
361 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
367 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
375 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
378 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D3x3p1-minmax-sse-5x4.c268 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
274 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
279 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
413 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4() local
419 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
425 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
437 _mm_storel_pi((__m64*) o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
449 vo3 = _mm_movehl_ps(vo3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
455 _mm_store_ss(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__sse_5x4()
D3x3p1-minmax-neonfma-4x4.c178 float32x4_t vo3 = vmaxq_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() local
183 vo3 = vminq_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
185 vst1q_f32(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
270 float32x4_t vo3 = vmaxq_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4() local
275 vo3 = vminq_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
278 vst1q_f32(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
286 float32x2_t vo3_lo = vget_low_f32(vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
296 vo3_lo = vget_high_f32(vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neonfma_4x4()
D3x3p1-minmax-neon-4x4.c178 float32x4_t vo3 = vmaxq_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() local
183 vo3 = vminq_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
185 vst1q_f32(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
270 float32x4_t vo3 = vmaxq_f32(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4() local
275 vo3 = vminq_f32(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
278 vst1q_f32(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
286 float32x2_t vo3_lo = vget_low_f32(vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
296 vo3_lo = vget_high_f32(vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__neon_4x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c227 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
231 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
233 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
343 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
347 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
351 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
357 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
365 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
368 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-6x4.c241 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
247 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
255 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
356 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4() local
362 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
371 wasm_v128_store(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
385 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
397 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
406 *o3 = wasm_f32x4_extract_lane(vo3, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_6x4()
D3x3p1-minmax-wasmsimd-arm-splat-5x4.c204 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4() local
209 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
213 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
310 v128_t vo3 = wasm_f32x4_max(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4() local
315 vo3 = wasm_f32x4_min(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
320 wasm_v128_store(o3, vo3); o3 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
327 *((double*) o3) = wasm_f64x2_extract_lane(vo3, 0); o3 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
335 vo3 = wasm_v32x4_shuffle(vo3, vo3, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
340 *o3 = wasm_f32x4_extract_lane(vo3, 0); o3 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_5x4()
D3x3p1-minmax-ssse3-6x4.c234 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
241 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
249 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
351 __m128 vo3 = _mm_max_ps(vo3p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4() local
358 vo3 = _mm_min_ps(vo3, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
367 _mm_storeu_ps(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
381 _mm_storel_pi((__m64*) o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
393 vo3 = _mm_movehl_ps(vo3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()
402 _mm_store_ss(o3, vo3); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_6x4()

123