Home
last modified time | relevance | path

Searched refs:vo0 (Results 1 – 25 of 340) sorted by relevance

12345678910>>...14

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3p1-minmax-wasmsimd-x86-splat-1x4-acc2.c110 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local
111 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
113 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
153 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2() local
154 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
157 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
160 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
162 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
165 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc2()
D3x3p1-minmax-wasmsimd-x86-splat-1x4.c109 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local
110 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
112 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
151 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4() local
152 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
155 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
158 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
160 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
163 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4.c118 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local
119 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
121 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
153 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4() local
154 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
157 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
161 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
167 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4()
D3x3p1-minmax-wasmsimd-x86-splat-1x4-acc4.c112 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local
113 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
115 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
157 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4() local
158 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
161 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
164 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
169 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc4()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c121 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() local
122 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
124 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
159 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4() local
160 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
163 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
167 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
170 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
173 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc4()
D3x3p1-minmax-wasmsimd-x86-splat-1x4-acc3.c111 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local
112 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
114 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
155 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3() local
156 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
159 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
162 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
167 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_splat_1x4_acc3()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c120 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local
121 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
123 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
157 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3() local
158 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
161 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
165 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
168 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
171 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc3()
D3x3p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c119 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local
120 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
122 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
155 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2() local
156 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
159 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
163 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
169 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_x86_loadsplat_1x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-splat-1x4.c115 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local
116 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
118 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
162 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4() local
163 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
167 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
170 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
172 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
175 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc2.c116 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local
117 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
119 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
164 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2() local
165 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
169 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
172 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
174 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
177 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc3.c127 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() local
128 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
130 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
176 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3() local
177 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
181 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
184 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
186 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
189 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc3()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc2.c126 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() local
127 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
129 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
174 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2() local
175 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
179 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
182 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
184 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
187 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc2()
D3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc4.c118 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() local
119 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
121 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
168 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4() local
169 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
173 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
176 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
178 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
181 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc4()
D3x3s2p1-minmax-wasmsimd-x86-splat-1x4-acc3.c117 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() local
118 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
120 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
166 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3() local
167 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
171 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
174 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
176 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
179 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_1x4_acc3()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4.c125 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() local
126 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
128 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
172 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4() local
173 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
177 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
180 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
182 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
185 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-1x4-acc4.c128 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() local
129 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
131 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
178 v128_t vo0 = wasm_v128_bitselect(vmin, vo0p0, wasm_f32x4_lt(vo0p0, vmin)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4() local
179 vo0 = wasm_v128_bitselect(vo0, vmax, wasm_f32x4_le(vo0, vmax)); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
183 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
186 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
188 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
191 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_1x4_acc4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc2.c119 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local
120 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
122 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
155 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2() local
156 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
159 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
163 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
169 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc2()
D3x3p1-minmax-wasmsimd-arm-splat-1x4.c109 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local
110 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
112 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
151 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4() local
152 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
155 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
158 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
160 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
163 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4()
D3x3p1-minmax-ssse3-1x4.c111 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local
113 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
115 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
148 __m128 vo0 = _mm_max_ps(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4() local
150 vo0 = _mm_min_ps(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
153 _mm_storeu_ps(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
157 _mm_storel_pi((__m64*) o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
160 vo0 = _mm_movehl_ps(vo0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
163 _mm_store_ss(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__ssse3_1x4()
D3x3p1-minmax-wasmsimd-arm-loadsplat-1x4.c118 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local
119 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
121 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
153 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4() local
154 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
157 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
161 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
167 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc2.c110 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local
111 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
113 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
153 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2() local
154 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
157 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
160 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
162 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
165 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc2()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc4.c112 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local
113 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
115 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
157 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4() local
158 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
161 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
164 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
166 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
169 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc4()
D3x3p1-minmax-wasmsimd-arm-splat-1x4-acc3.c111 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local
112 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
114 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
155 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3() local
156 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
159 wasm_v128_store(o0, vo0); o0 += 4; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
162 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); o0 += 2; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
164 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
167 *o0 = wasm_f32x4_extract_lane(vo0, 0); o0 += 1; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_splat_1x4_acc3()
D3x3p1-minmax-wasmsimd-arm-loadsplat-1x4-acc3.c120 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local
121 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
123 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
157 v128_t vo0 = wasm_f32x4_max(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3() local
158 vo0 = wasm_f32x4_min(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
161 wasm_v128_store(o0, vo0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
165 *((double*) o0) = wasm_f64x2_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
168 vo0 = wasm_v32x4_shuffle(vo0, vo0, 2, 3, 0, 1); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
171 *o0 = wasm_f32x4_extract_lane(vo0, 0); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__wasmsimd_arm_loadsplat_1x4_acc3()
D3x3p1-minmax-scalar-1x1-acc3.c94 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
96 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
98 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
114 float vo0 = math_max_f32(vo0p0, vmin); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3() local
116 vo0 = math_min_f32(vo0, vmax); in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()
118 *o0++ = vo0; in xnn_f32_dwconv2d_chw_ukernel_3x3p1__scalar_1x1_acc3()

12345678910>>...14