/external/XNNPACK/src/f32-dwconv2d-chw/gen/ |
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4-acc2.c | 110 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local 111 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 113 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 153 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local 154 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 157 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 160 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 162 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() 165 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4.c | 109 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local 110 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 112 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 151 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local 152 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 155 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 158 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 160 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() 163 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4.c | 118 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local 119 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 121 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 153 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local 154 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 157 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 161 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() 167 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4-acc4.c | 112 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local 113 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 115 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 157 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local 158 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 161 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 164 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() 169 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c | 121 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() local 122 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() 124 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() 159 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() local 160 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() 163 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() 167 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() 170 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() 173 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-x86-splat-1x4-acc3.c | 111 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local 112 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 114 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 155 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local 156 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 159 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 162 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() 167 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c | 120 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local 121 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 123 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 157 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local 158 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 161 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 165 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 168 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() 171 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
|
D | 3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c | 119 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local 120 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 122 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 155 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local 156 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 159 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 163 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() 169 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4.c | 115 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local 116 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 118 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 162 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local 163 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 167 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 170 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 172 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() 175 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc2.c | 116 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local 117 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 119 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 164 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local 165 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 169 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 172 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 174 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() 177 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c | 127 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() local 128 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 130 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 176 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() local 177 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 181 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 184 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 186 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() 189 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c | 126 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() local 127 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 129 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 174 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() local 175 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 179 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 182 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 184 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() 187 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc4.c | 118 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() local 119 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 121 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 168 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() local 169 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 173 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 176 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 178 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() 181 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc3.c | 117 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() local 118 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 120 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 166 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() local 167 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 171 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 174 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 176 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() 179 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4.c | 125 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() local 126 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 128 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 172 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() local 173 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 177 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 180 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 182 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() 185 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
|
D | 3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c | 128 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() local 129 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 131 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 178 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() local 179 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 183 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 186 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 188 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() 191 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c | 119 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local 120 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 122 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 155 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local 156 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 159 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 163 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() 169 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4.c | 109 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local 110 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 112 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 151 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local 152 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 155 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 158 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 160 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() 163 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
|
D | 3x3p1-minmax-ssse3-1x4.c | 111 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local 113 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 115 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 148 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local 150 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 153 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 157 _mm_storel_pi((__m64*) o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 160 vo0 = _mm_movehl_ps(vo0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() 163 _mm_store_ss(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-1x4.c | 118 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local 119 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 121 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 153 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local 154 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 157 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 161 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() 167 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4-acc2.c | 110 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local 111 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 113 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 153 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local 154 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 157 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 160 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 162 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() 165 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4-acc4.c | 112 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local 113 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 115 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 157 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local 158 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 161 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 164 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() 169 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
|
D | 3x3p1-minmax-wasmsimd-arm-splat-1x4-acc3.c | 111 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local 112 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 114 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 155 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local 156 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 159 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 162 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() 167 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
|
D | 3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c | 120 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local 121 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 123 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 157 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local 158 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 161 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 165 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 168 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() 171 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
|
D | 3x3p1-minmax-scalar-1x1-acc3.c | 94 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local 96 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() 98 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() 114 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local 116 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() 118 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
|