Home
last modified time | relevance | path

Searched refs:vmask_even (Results 1 – 25 of 135) sorted by relevance

123456

/external/XNNPACK/src/f32-dwconv2d-chw/gen/
D3x3s2p1-minmax-neonfma-4x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4() local
217 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
219 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
221 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
223 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
225 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
227 …const float32x4_t vi5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
229 …const float32x4_t vi6x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
231 …const float32x4_t vi7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
233 …const float32x4_t vi8x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi8… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_4x4()
D3x3s2p1-minmax-neon-4x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4() local
217 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
219 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
221 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
223 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
225 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
227 …const float32x4_t vi5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
229 …const float32x4_t vi6x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
231 …const float32x4_t vi7x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi7… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
233 …const float32x4_t vi8x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi8… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_4x4()
D3x3s2p1-minmax-neonfma-3x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4() local
183 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
185 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
187 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
189 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
191 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
193 …const float32x4_t vi5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
195 …const float32x4_t vi6x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_3x4()
D3x3s2p1-minmax-neon-3x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4() local
183 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
185 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
187 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
189 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
191 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
193 …const float32x4_t vi5x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi5… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
195 …const float32x4_t vi6x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi6… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_3x4()
D3x3s2p1-minmax-neonfma-2x4-acc2.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2() local
151 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
153 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
155 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
157 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
159 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4_acc2()
D3x3s2p1-minmax-neon-2x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4() local
149 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
151 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
153 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
155 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
157 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_2x4()
D3x3s2p1-minmax-neonfma-2x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4() local
149 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
151 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
153 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
155 …const float32x4_t vi3x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi3… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
157 …const float32x4_t vi4x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi4… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_2x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-4x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4() local
275 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
277 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
279 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
281 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
283 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
285 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
287 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
289 …const v128_t vi7x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
291 …const v128_t vi8x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_4x4()
D3x3s2p1-minmax-neonfma-1x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4() local
114 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
116 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
118 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4()
D3x3s2p1-minmax-neonfma-1x4-acc2.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2() local
115 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
117 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
119 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc2()
D3x3s2p1-minmax-neonfma-1x4-acc4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4() local
117 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
119 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
121 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc4()
D3x3s2p1-minmax-neon-1x4-acc2.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2() local
115 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
117 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
119 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc2()
D3x3s2p1-minmax-neon-1x4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4() local
114 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
116 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
118 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4()
D3x3s2p1-minmax-neon-1x4-acc3.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3() local
116 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
118 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
120 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc3()
D3x3s2p1-minmax-neonfma-1x4-acc3.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3() local
116 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
118 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
120 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neonfma_1x4_acc3()
D3x3s2p1-minmax-neon-1x4-acc4.c34 const uint32x4_t vmask_even = vld1q_u32(params->neon.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4() local
117 …const float32x4_t vi0x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
119 …const float32x4_t vi1x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi1… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
121 …const float32x4_t vi2x8ACE = vreinterpretq_f32_u32(vandq_u32(vmask_even, vreinterpretq_u32_f32(vi2… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__neon_1x4_acc4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-3x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4() local
231 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
233 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
235 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
237 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
239 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
241 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
243 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-loadsplat-3x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4() local
231 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
233 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
235 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
237 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
239 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
241 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
243 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_loadsplat_3x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-3x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4() local
221 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
223 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
225 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
227 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
229 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
231 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
233 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_3x4()
D3x3s2p1-minmax-sse-6x4.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4() local
359 …const __m128 vi0x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
361 …const __m128 vi1x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi1x89AB, vi1xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
363 …const __m128 vi2x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi2x89AB, vi2xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
365 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
367 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
369 …const __m128 vi5x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
371 …const __m128 vi6x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
373 …const __m128 vi7x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi7x89AB, vi7xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
375 …const __m128 vi8x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi8x89AB, vi8xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_6x4()
[all …]
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-4x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4() local
275 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
277 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
279 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
281 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
283 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
285 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
287 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
289 …const v128_t vi7x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
291 …const v128_t vi8x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_4x4()
D3x3s2p1-minmax-wasmsimd-arm-splat-4x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4() local
265 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
267 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
269 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
271 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
273 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
275 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
277 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
279 …const v128_t vi7x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
281 …const v128_t vi8x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_arm_splat_4x4()
D3x3s2p1-minmax-wasmsimd-x86-splat-4x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4() local
265 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
267 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
269 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
271 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
273 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
275 …const v128_t vi5x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi5x89AB, vi5xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
277 …const v128_t vi6x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi6x89AB, vi6xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
279 …const v128_t vi7x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi7x89AB, vi7xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
281 …const v128_t vi8x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi8x89AB, vi8xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_splat_4x4()
D3x3s2p1-minmax-sse-4x4.c34 const __m128 vmask_even = _mm_load_ps((const float*) params->sse.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4() local
269 …const __m128 vi0x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi0x89AB, vi0xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
271 …const __m128 vi1x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi1x89AB, vi1xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
273 …const __m128 vi2x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi2x89AB, vi2xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
275 …const __m128 vi3x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi3x89AB, vi3xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
277 …const __m128 vi4x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi4x89AB, vi4xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
279 …const __m128 vi5x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi5x89AB, vi5xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
281 …const __m128 vi6x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi6x89AB, vi6xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
283 …const __m128 vi7x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi7x89AB, vi7xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
285 …const __m128 vi8x8ACE = _mm_and_ps(vmask_even, _mm_shuffle_ps(vi8x89AB, vi8xCDEF, _MM_SHUFFLE(2, 0… in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__sse_4x4()
D3x3s2p1-minmax-wasmsimd-x86-loadsplat-2x4.c35 const v128_t vmask_even = wasm_v128_load(params->scalar.mask_even); in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4() local
187 …const v128_t vi0x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi0x89AB, vi0xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
189 …const v128_t vi1x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi1x89AB, vi1xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
191 …const v128_t vi2x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi2x89AB, vi2xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
193 …const v128_t vi3x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi3x89AB, vi3xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()
195 …const v128_t vi4x8ACE = wasm_v128_and(vmask_even, wasm_v32x4_shuffle(vi4x89AB, vi4xCDEF, 0, 2, 4, … in xnn_f32_dwconv2d_chw_ukernel_3x3s2p1__wasmsimd_x86_loadsplat_2x4()

123456