Home
last modified time | relevance | path

Searched refs:vout0x0123456789ABCDEF (Results 1 – 25 of 68) sorted by relevance

123

/external/XNNPACK/src/qs8-igemm/gen/
D1x16c4-minmax-neondot.c132 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local
137 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot() local
142 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
144 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
147 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
155 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
158 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c4__neondot()
D1x16c2-minmax-neon-mull-padal-dup.c194 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup() local
199 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup() local
204 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
206 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
209 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
217 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
220 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
D1x16c8-minmax-neon-mull-padal.c199 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
204 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal() local
209 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
211 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
214 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
222 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
225 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16-minmax-neon-mlal-lane.c260 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane() local
265 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane() local
270 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
272 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
275 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
283 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
286 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mlal_lane()
D1x16-minmax-neon-mull-addw-dup.c257 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() local
262 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup() local
267 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
269 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
272 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
280 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
283 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16__neon_mull_addw_dup()
D1x16c16-minmax-neon-mlal-padal.c215 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
220 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
225 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
227 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
230 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
238 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
241 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D4x16c4-minmax-neondot.c252 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local
266 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot() local
277 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
282 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
288 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
299 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
307 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_igemm_minmax_ukernel_4x16c4__neondot()
D1x16c8-minmax-neon-mlal-padal.c288 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
293 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
298 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
300 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
303 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
311 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
314 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c2-minmax-neon-mull-padal-dup.c278 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local
286 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local
293 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
296 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
300 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
309 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
313 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_igemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
D1x16c2-minmax-neon-mlal-padal-dup.c282 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
287 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
292 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
294 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
297 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
305 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
308 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
D2x16c8-minmax-neon-mull-padal.c310 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
318 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal() local
325 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
328 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
332 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
341 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
345 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_igemm_minmax_ukernel_2x16c8__neon_mull_padal()
D1x16c8-minmax-avx512skx.c123 …const __m128i vout0x0123456789ABCDEF = _mm_shuffle_epi8(vout0x084C2A6E195D3B7F, _mm_set_epi8(15, 7… in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx() local
126 _mm_storeu_si128((__m128i*) c0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
137 _mm_mask_storeu_epi8(c0, vmask, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_1x16c8__avx512skx()
D2x16-minmax-neon-mlal-lane.c350 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() local
358 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane() local
365 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
368 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
372 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
381 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
385 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_igemm_minmax_ukernel_2x16__neon_mlal_lane()
/external/XNNPACK/src/qs8-gemm/gen/
D1x16c4-minmax-neondot.c122 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local
127 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot() local
132 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
134 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
138 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
148 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
151 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c4__neondot()
D1x16c8-minmax-neon-mull-padal.c185 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
190 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal() local
195 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
197 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
200 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
208 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
211 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mull_padal()
D1x16c2-minmax-neon-mull-padal-dup.c180 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup() local
185 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup() local
190 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
192 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
195 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
203 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
206 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mull_padal_dup()
D1x16-minmax-neon-mull-addw-dup.c243 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() local
248 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup() local
253 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
255 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
258 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
266 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
269 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mull_addw_dup()
D1x16-minmax-neon-mlal-lane.c247 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane() local
252 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane() local
257 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
259 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
262 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
270 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
273 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16__neon_mlal_lane()
D1x16c16-minmax-neon-mlal-padal.c201 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
206 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal() local
211 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
213 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
216 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
224 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
227 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c16__neon_mlal_padal()
D4x16c4-minmax-neondot.c236 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local
250 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot() local
258 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
263 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
270 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
289 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
296 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_gemm_minmax_ukernel_4x16c4__neondot()
D1x16c8-minmax-neon-mlal-padal.c274 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
279 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal() local
284 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
286 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
289 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
297 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
300 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__neon_mlal_padal()
D2x16c2-minmax-neon-mull-padal-dup.c262 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local
270 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup() local
276 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
279 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
283 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
294 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
298 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_gemm_minmax_ukernel_2x16c2__neon_mull_padal_dup()
D1x16c2-minmax-neon-mlal-padal-dup.c268 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
273 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup() local
278 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
280 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
283 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
291 int8x8_t vout0x01234567 = vget_low_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
294 vout0x01234567 = vget_high_s8(vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c2__neon_mlal_padal_dup()
D2x16c8-minmax-neon-mull-padal.c294 int8x16_t vout0x0123456789ABCDEF = vqmovn_high_s16(vqmovn_s16(vacc0x01234567), vacc0x89ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
302 …int8x16_t vout0x0123456789ABCDEF = vcombine_s8(vqmovn_s16(vacc0x01234567), vqmovn_s16(vacc0x89ABCD… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal() local
308 vout0x0123456789ABCDEF = vmaxq_s8(vout0x0123456789ABCDEF, voutput_min); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
311 vout0x0123456789ABCDEF = vminq_s8(vout0x0123456789ABCDEF, voutput_max); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
315 vst1q_s8(c0 + 0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
326 …int8x16_t vout0x01234567_1x01234567 = vcombine_s8(vget_low_s8(vout0x0123456789ABCDEF), vget_low_s8… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
330 …vout0x01234567_1x01234567 = vcombine_s8(vget_high_s8(vout0x0123456789ABCDEF), vget_high_s8(vout1x0… in xnn_qs8_gemm_minmax_ukernel_2x16c8__neon_mull_padal()
D1x16c8-minmax-avx512skx.c112 …const __m128i vout0x0123456789ABCDEF = _mm_shuffle_epi8(vout0x084C2A6E195D3B7F, _mm_set_epi8(15, 7… in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx() local
115 _mm_storeu_si128((__m128i*) c0, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()
126 _mm_mask_storeu_epi8(c0, vmask, vout0x0123456789ABCDEF); in xnn_qs8_gemm_minmax_ukernel_1x16c8__avx512skx()

123